10b57cec5SDimitry Andric //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This pass performs loop invariant code motion, attempting to remove as much 100b57cec5SDimitry Andric // code from the body of a loop as possible. It does this by either hoisting 110b57cec5SDimitry Andric // code into the preheader block, or by sinking code to the exit blocks if it is 120b57cec5SDimitry Andric // safe. This pass also promotes must-aliased memory locations in the loop to 130b57cec5SDimitry Andric // live in registers, thus hoisting and sinking "invariant" loads and stores. 140b57cec5SDimitry Andric // 15e8d8bef9SDimitry Andric // Hoisting operations out of loops is a canonicalization transform. It 16e8d8bef9SDimitry Andric // enables and simplifies subsequent optimizations in the middle-end. 17e8d8bef9SDimitry Andric // Rematerialization of hoisted instructions to reduce register pressure is the 18e8d8bef9SDimitry Andric // responsibility of the back-end, which has more accurate information about 19e8d8bef9SDimitry Andric // register pressure and also handles other optimizations than LICM that 20e8d8bef9SDimitry Andric // increase live-ranges. 21e8d8bef9SDimitry Andric // 220b57cec5SDimitry Andric // This pass uses alias analysis for two purposes: 230b57cec5SDimitry Andric // 240b57cec5SDimitry Andric // 1. Moving loop invariant loads and calls out of loops. If we can determine 250b57cec5SDimitry Andric // that a load or call inside of a loop never aliases anything stored to, 260b57cec5SDimitry Andric // we can hoist it or sink it like any other instruction. 270b57cec5SDimitry Andric // 2. Scalar Promotion of Memory - If there is a store instruction inside of 280b57cec5SDimitry Andric // the loop, we try to move the store to happen AFTER the loop instead of 290b57cec5SDimitry Andric // inside of the loop. This can only happen if a few conditions are true: 300b57cec5SDimitry Andric // A. The pointer stored through is loop invariant 310b57cec5SDimitry Andric // B. There are no stores or loads in the loop which _may_ alias the 320b57cec5SDimitry Andric // pointer. There are no calls in the loop which mod/ref the pointer. 330b57cec5SDimitry Andric // If these conditions are true, we can promote the loads and stores in the 340b57cec5SDimitry Andric // loop of the pointer to use a temporary alloca'd variable. We then use 350b57cec5SDimitry Andric // the SSAUpdater to construct the appropriate SSA form for the value. 360b57cec5SDimitry Andric // 370b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/LICM.h" 4081ad6265SDimitry Andric #include "llvm/ADT/PriorityWorklist.h" 410b57cec5SDimitry Andric #include "llvm/ADT/SetOperations.h" 420b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 430b57cec5SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h" 440b57cec5SDimitry Andric #include "llvm/Analysis/AliasSetTracker.h" 45bdd1243dSDimitry Andric #include "llvm/Analysis/AssumptionCache.h" 460b57cec5SDimitry Andric #include "llvm/Analysis/CaptureTracking.h" 470b57cec5SDimitry Andric #include "llvm/Analysis/GuardUtils.h" 48e8d8bef9SDimitry Andric #include "llvm/Analysis/LazyBlockFrequencyInfo.h" 490b57cec5SDimitry Andric #include "llvm/Analysis/Loads.h" 500b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 510b57cec5SDimitry Andric #include "llvm/Analysis/LoopIterator.h" 5281ad6265SDimitry Andric #include "llvm/Analysis/LoopNestAnalysis.h" 530b57cec5SDimitry Andric #include "llvm/Analysis/LoopPass.h" 540b57cec5SDimitry Andric #include "llvm/Analysis/MemorySSA.h" 550b57cec5SDimitry Andric #include "llvm/Analysis/MemorySSAUpdater.h" 565ffd83dbSDimitry Andric #include "llvm/Analysis/MustExecute.h" 570b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 580b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 590b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h" 6081ad6265SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 610b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 620b57cec5SDimitry Andric #include "llvm/IR/CFG.h" 630b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 640b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 650b57cec5SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h" 660b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h" 670b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 680b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 690b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 7006c3fb27SDimitry Andric #include "llvm/IR/IRBuilder.h" 710b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h" 720b57cec5SDimitry Andric #include "llvm/IR/Metadata.h" 730b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h" 740b57cec5SDimitry Andric #include "llvm/IR/PredIteratorCache.h" 75480093f4SDimitry Andric #include "llvm/InitializePasses.h" 760b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 770b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 780b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 79bdd1243dSDimitry Andric #include "llvm/Target/TargetOptions.h" 800b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h" 815ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" 820b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 830b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Local.h" 840b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h" 850b57cec5SDimitry Andric #include "llvm/Transforms/Utils/SSAUpdater.h" 860b57cec5SDimitry Andric #include <algorithm> 870b57cec5SDimitry Andric #include <utility> 880b57cec5SDimitry Andric using namespace llvm; 890b57cec5SDimitry Andric 9081ad6265SDimitry Andric namespace llvm { 9181ad6265SDimitry Andric class LPMUpdater; 9281ad6265SDimitry Andric } // namespace llvm 9381ad6265SDimitry Andric 940b57cec5SDimitry Andric #define DEBUG_TYPE "licm" 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric STATISTIC(NumCreatedBlocks, "Number of blocks created"); 970b57cec5SDimitry Andric STATISTIC(NumClonedBranches, "Number of branches cloned"); 980b57cec5SDimitry Andric STATISTIC(NumSunk, "Number of instructions sunk out of loop"); 990b57cec5SDimitry Andric STATISTIC(NumHoisted, "Number of instructions hoisted out of loop"); 1000b57cec5SDimitry Andric STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); 1010b57cec5SDimitry Andric STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk"); 102bdd1243dSDimitry Andric STATISTIC(NumPromotionCandidates, "Number of promotion candidates"); 103bdd1243dSDimitry Andric STATISTIC(NumLoadPromoted, "Number of load-only promotions"); 104bdd1243dSDimitry Andric STATISTIC(NumLoadStorePromoted, "Number of load and store promotions"); 10506c3fb27SDimitry Andric STATISTIC(NumMinMaxHoisted, 10606c3fb27SDimitry Andric "Number of min/max expressions hoisted out of the loop"); 10706c3fb27SDimitry Andric STATISTIC(NumGEPsHoisted, 10806c3fb27SDimitry Andric "Number of geps reassociated and hoisted out of the loop"); 10906c3fb27SDimitry Andric STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated " 11006c3fb27SDimitry Andric "and hoisted out of the loop"); 1115f757f3fSDimitry Andric STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions " 1125f757f3fSDimitry Andric "reassociated and hoisted out of the loop"); 1130fca6ea1SDimitry Andric STATISTIC(NumIntAssociationsHoisted, 1140fca6ea1SDimitry Andric "Number of invariant int expressions " 1150fca6ea1SDimitry Andric "reassociated and hoisted out of the loop"); 1160b57cec5SDimitry Andric 1170b57cec5SDimitry Andric /// Memory promotion is enabled by default. 1180b57cec5SDimitry Andric static cl::opt<bool> 1190b57cec5SDimitry Andric DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), 1200b57cec5SDimitry Andric cl::desc("Disable memory promotion in LICM pass")); 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric static cl::opt<bool> ControlFlowHoisting( 1230b57cec5SDimitry Andric "licm-control-flow-hoisting", cl::Hidden, cl::init(false), 1240b57cec5SDimitry Andric cl::desc("Enable control flow (and PHI) hoisting in LICM")); 1250b57cec5SDimitry Andric 126bdd1243dSDimitry Andric static cl::opt<bool> 127bdd1243dSDimitry Andric SingleThread("licm-force-thread-model-single", cl::Hidden, cl::init(false), 128bdd1243dSDimitry Andric cl::desc("Force thread model single in LICM pass")); 129bdd1243dSDimitry Andric 1300b57cec5SDimitry Andric static cl::opt<uint32_t> MaxNumUsesTraversed( 1310b57cec5SDimitry Andric "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), 1320b57cec5SDimitry Andric cl::desc("Max num uses visited for identifying load " 1330b57cec5SDimitry Andric "invariance in loop using invariant start (default = 8)")); 1340b57cec5SDimitry Andric 1355f757f3fSDimitry Andric static cl::opt<unsigned> FPAssociationUpperLimit( 1365f757f3fSDimitry Andric "licm-max-num-fp-reassociations", cl::init(5U), cl::Hidden, 1375f757f3fSDimitry Andric cl::desc( 1385f757f3fSDimitry Andric "Set upper limit for the number of transformations performed " 1395f757f3fSDimitry Andric "during a single round of hoisting the reassociated expressions.")); 1405f757f3fSDimitry Andric 1410fca6ea1SDimitry Andric cl::opt<unsigned> IntAssociationUpperLimit( 1420fca6ea1SDimitry Andric "licm-max-num-int-reassociations", cl::init(5U), cl::Hidden, 1430fca6ea1SDimitry Andric cl::desc( 1440fca6ea1SDimitry Andric "Set upper limit for the number of transformations performed " 1450fca6ea1SDimitry Andric "during a single round of hoisting the reassociated expressions.")); 1460fca6ea1SDimitry Andric 1470b57cec5SDimitry Andric // Experimental option to allow imprecision in LICM in pathological cases, in 1480b57cec5SDimitry Andric // exchange for faster compile. This is to be removed if MemorySSA starts to 14981ad6265SDimitry Andric // address the same issue. LICM calls MemorySSAWalker's 1500b57cec5SDimitry Andric // getClobberingMemoryAccess, up to the value of the Cap, getting perfect 1510b57cec5SDimitry Andric // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess, 1520b57cec5SDimitry Andric // which may not be precise, since optimizeUses is capped. The result is 1530b57cec5SDimitry Andric // correct, but we may not get as "far up" as possible to get which access is 1540b57cec5SDimitry Andric // clobbering the one queried. 1550b57cec5SDimitry Andric cl::opt<unsigned> llvm::SetLicmMssaOptCap( 1560b57cec5SDimitry Andric "licm-mssa-optimization-cap", cl::init(100), cl::Hidden, 1570b57cec5SDimitry Andric cl::desc("Enable imprecision in LICM in pathological cases, in exchange " 1580b57cec5SDimitry Andric "for faster compile. Caps the MemorySSA clobbering calls.")); 1590b57cec5SDimitry Andric 1600b57cec5SDimitry Andric // Experimentally, memory promotion carries less importance than sinking and 1610b57cec5SDimitry Andric // hoisting. Limit when we do promotion when using MemorySSA, in order to save 1620b57cec5SDimitry Andric // compile time. 1630b57cec5SDimitry Andric cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap( 1640b57cec5SDimitry Andric "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden, 1650b57cec5SDimitry Andric cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no " 1660b57cec5SDimitry Andric "effect. When MSSA in LICM is enabled, then this is the maximum " 1670b57cec5SDimitry Andric "number of accesses allowed to be present in a loop in order to " 1680b57cec5SDimitry Andric "enable memory promotion.")); 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); 17106c3fb27SDimitry Andric static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop, 1720b57cec5SDimitry Andric const LoopSafetyInfo *SafetyInfo, 17306c3fb27SDimitry Andric TargetTransformInfo *TTI, 17406c3fb27SDimitry Andric bool &FoldableInLoop, bool LoopNestMode); 1750b57cec5SDimitry Andric static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, 1760b57cec5SDimitry Andric BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, 17781ad6265SDimitry Andric MemorySSAUpdater &MSSAU, ScalarEvolution *SE, 178480093f4SDimitry Andric OptimizationRemarkEmitter *ORE); 1790b57cec5SDimitry Andric static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, 180bdd1243dSDimitry Andric const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, 181bdd1243dSDimitry Andric MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE); 182fb03ea46SDimitry Andric static bool isSafeToExecuteUnconditionally( 183fb03ea46SDimitry Andric Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, 184fb03ea46SDimitry Andric const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, 185fb03ea46SDimitry Andric OptimizationRemarkEmitter *ORE, const Instruction *CtxI, 186bdd1243dSDimitry Andric AssumptionCache *AC, bool AllowSpeculation); 18781ad6265SDimitry Andric static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU, 188e8d8bef9SDimitry Andric Loop *CurLoop, Instruction &I, 18906c3fb27SDimitry Andric SinkAndHoistLICMFlags &Flags, 19006c3fb27SDimitry Andric bool InvariantGroup); 19181ad6265SDimitry Andric static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, 192e8d8bef9SDimitry Andric MemoryUse &MU); 19306c3fb27SDimitry Andric /// Aggregates various functions for hoisting computations out of loop. 19406c3fb27SDimitry Andric static bool hoistArithmetics(Instruction &I, Loop &L, 19506c3fb27SDimitry Andric ICFLoopSafetyInfo &SafetyInfo, 19606c3fb27SDimitry Andric MemorySSAUpdater &MSSAU, AssumptionCache *AC, 19706c3fb27SDimitry Andric DominatorTree *DT); 1985ffd83dbSDimitry Andric static Instruction *cloneInstructionInExitBlock( 1990b57cec5SDimitry Andric Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, 20081ad6265SDimitry Andric const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU); 2010b57cec5SDimitry Andric 2020b57cec5SDimitry Andric static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, 20381ad6265SDimitry Andric MemorySSAUpdater &MSSAU); 2040b57cec5SDimitry Andric 2055f757f3fSDimitry Andric static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest, 2060b57cec5SDimitry Andric ICFLoopSafetyInfo &SafetyInfo, 20781ad6265SDimitry Andric MemorySSAUpdater &MSSAU, ScalarEvolution *SE); 2080b57cec5SDimitry Andric 209fe6060f1SDimitry Andric static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L, 210fe6060f1SDimitry Andric function_ref<void(Instruction *)> Fn); 211bdd1243dSDimitry Andric using PointersAndHasReadsOutsideSet = 212bdd1243dSDimitry Andric std::pair<SmallSetVector<Value *, 8>, bool>; 213bdd1243dSDimitry Andric static SmallVector<PointersAndHasReadsOutsideSet, 0> 214fe6060f1SDimitry Andric collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L); 215fe6060f1SDimitry Andric 2160b57cec5SDimitry Andric namespace { 2170b57cec5SDimitry Andric struct LoopInvariantCodeMotion { 2185ffd83dbSDimitry Andric bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT, 219bdd1243dSDimitry Andric AssumptionCache *AC, TargetLibraryInfo *TLI, 220e8d8bef9SDimitry Andric TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA, 221fe6060f1SDimitry Andric OptimizationRemarkEmitter *ORE, bool LoopNestMode = false); 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric LoopInvariantCodeMotion(unsigned LicmMssaOptCap, 224fb03ea46SDimitry Andric unsigned LicmMssaNoAccForPromotionCap, 225fb03ea46SDimitry Andric bool LicmAllowSpeculation) 2260b57cec5SDimitry Andric : LicmMssaOptCap(LicmMssaOptCap), 227fb03ea46SDimitry Andric LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), 228fb03ea46SDimitry Andric LicmAllowSpeculation(LicmAllowSpeculation) {} 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric private: 2310b57cec5SDimitry Andric unsigned LicmMssaOptCap; 2320b57cec5SDimitry Andric unsigned LicmMssaNoAccForPromotionCap; 233fb03ea46SDimitry Andric bool LicmAllowSpeculation; 2340b57cec5SDimitry Andric }; 2350b57cec5SDimitry Andric 2360b57cec5SDimitry Andric struct LegacyLICMPass : public LoopPass { 2370b57cec5SDimitry Andric static char ID; // Pass identification, replacement for typeid 2380b57cec5SDimitry Andric LegacyLICMPass( 2390b57cec5SDimitry Andric unsigned LicmMssaOptCap = SetLicmMssaOptCap, 240fb03ea46SDimitry Andric unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap, 241fb03ea46SDimitry Andric bool LicmAllowSpeculation = true) 242fb03ea46SDimitry Andric : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, 243fb03ea46SDimitry Andric LicmAllowSpeculation) { 2440b57cec5SDimitry Andric initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry()); 2450b57cec5SDimitry Andric } 2460b57cec5SDimitry Andric 2470b57cec5SDimitry Andric bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2485ffd83dbSDimitry Andric if (skipLoop(L)) 2490b57cec5SDimitry Andric return false; 2500b57cec5SDimitry Andric 251e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block " 252e8d8bef9SDimitry Andric << L->getHeader()->getNameOrAsOperand() << "\n"); 253e8d8bef9SDimitry Andric 254bdd1243dSDimitry Andric Function *F = L->getHeader()->getParent(); 255bdd1243dSDimitry Andric 2560b57cec5SDimitry Andric auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); 257349cc55cSDimitry Andric MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA(); 2580b57cec5SDimitry Andric // For the old PM, we can't use OptimizationRemarkEmitter as an analysis 2590b57cec5SDimitry Andric // pass. Function analyses need to be preserved across loop transformations 2600b57cec5SDimitry Andric // but ORE cannot be preserved (see comment before the pass definition). 2610b57cec5SDimitry Andric OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); 262e8d8bef9SDimitry Andric return LICM.runOnLoop( 263e8d8bef9SDimitry Andric L, &getAnalysis<AAResultsWrapperPass>().getAAResults(), 2640b57cec5SDimitry Andric &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), 265bdd1243dSDimitry Andric &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), 266bdd1243dSDimitry Andric &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F), 267bdd1243dSDimitry Andric &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(*F), 268bdd1243dSDimitry Andric &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F), 2695ffd83dbSDimitry Andric SE ? &SE->getSE() : nullptr, MSSA, &ORE); 2700b57cec5SDimitry Andric } 2710b57cec5SDimitry Andric 2720b57cec5SDimitry Andric /// This transformation requires natural loop information & requires that 2730b57cec5SDimitry Andric /// loop preheaders be inserted into the CFG... 2740b57cec5SDimitry Andric /// 2750b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 2760b57cec5SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>(); 2770b57cec5SDimitry Andric AU.addPreserved<LoopInfoWrapperPass>(); 2780b57cec5SDimitry Andric AU.addRequired<TargetLibraryInfoWrapperPass>(); 2790b57cec5SDimitry Andric AU.addRequired<MemorySSAWrapperPass>(); 2800b57cec5SDimitry Andric AU.addPreserved<MemorySSAWrapperPass>(); 2810b57cec5SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>(); 282bdd1243dSDimitry Andric AU.addRequired<AssumptionCacheTracker>(); 2830b57cec5SDimitry Andric getLoopAnalysisUsage(AU); 284e8d8bef9SDimitry Andric LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); 285e8d8bef9SDimitry Andric AU.addPreserved<LazyBlockFrequencyInfoPass>(); 286e8d8bef9SDimitry Andric AU.addPreserved<LazyBranchProbabilityInfoPass>(); 2870b57cec5SDimitry Andric } 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric private: 2900b57cec5SDimitry Andric LoopInvariantCodeMotion LICM; 2910b57cec5SDimitry Andric }; 2920b57cec5SDimitry Andric } // namespace 2930b57cec5SDimitry Andric 2940b57cec5SDimitry Andric PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, 2950b57cec5SDimitry Andric LoopStandardAnalysisResults &AR, LPMUpdater &) { 296349cc55cSDimitry Andric if (!AR.MSSA) 297bdd1243dSDimitry Andric report_fatal_error("LICM requires MemorySSA (loop-mssa)", 298bdd1243dSDimitry Andric /*GenCrashDiag*/false); 299349cc55cSDimitry Andric 3005ffd83dbSDimitry Andric // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis 3015ffd83dbSDimitry Andric // pass. Function analyses need to be preserved across loop transformations 3025ffd83dbSDimitry Andric // but ORE cannot be preserved (see comment before the pass definition). 3035ffd83dbSDimitry Andric OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); 3040b57cec5SDimitry Andric 30581ad6265SDimitry Andric LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap, 30681ad6265SDimitry Andric Opts.AllowSpeculation); 307bdd1243dSDimitry Andric if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.AC, &AR.TLI, &AR.TTI, 308e8d8bef9SDimitry Andric &AR.SE, AR.MSSA, &ORE)) 3090b57cec5SDimitry Andric return PreservedAnalyses::all(); 3100b57cec5SDimitry Andric 3110b57cec5SDimitry Andric auto PA = getLoopPassPreservedAnalyses(); 3120b57cec5SDimitry Andric PA.preserve<MemorySSAAnalysis>(); 3130b57cec5SDimitry Andric 3140b57cec5SDimitry Andric return PA; 3150b57cec5SDimitry Andric } 3160b57cec5SDimitry Andric 31781ad6265SDimitry Andric void LICMPass::printPipeline( 31881ad6265SDimitry Andric raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 31981ad6265SDimitry Andric static_cast<PassInfoMixin<LICMPass> *>(this)->printPipeline( 32081ad6265SDimitry Andric OS, MapClassName2PassName); 32181ad6265SDimitry Andric 32206c3fb27SDimitry Andric OS << '<'; 32381ad6265SDimitry Andric OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation"; 32406c3fb27SDimitry Andric OS << '>'; 32581ad6265SDimitry Andric } 32681ad6265SDimitry Andric 327fe6060f1SDimitry Andric PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM, 328fe6060f1SDimitry Andric LoopStandardAnalysisResults &AR, 329fe6060f1SDimitry Andric LPMUpdater &) { 330349cc55cSDimitry Andric if (!AR.MSSA) 331bdd1243dSDimitry Andric report_fatal_error("LNICM requires MemorySSA (loop-mssa)", 332bdd1243dSDimitry Andric /*GenCrashDiag*/false); 333349cc55cSDimitry Andric 334fe6060f1SDimitry Andric // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis 335fe6060f1SDimitry Andric // pass. Function analyses need to be preserved across loop transformations 336fe6060f1SDimitry Andric // but ORE cannot be preserved (see comment before the pass definition). 337fe6060f1SDimitry Andric OptimizationRemarkEmitter ORE(LN.getParent()); 338fe6060f1SDimitry Andric 33981ad6265SDimitry Andric LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap, 34081ad6265SDimitry Andric Opts.AllowSpeculation); 341fe6060f1SDimitry Andric 342fe6060f1SDimitry Andric Loop &OutermostLoop = LN.getOutermostLoop(); 343bdd1243dSDimitry Andric bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, &AR.AC, 344fe6060f1SDimitry Andric &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE, true); 345fe6060f1SDimitry Andric 346fe6060f1SDimitry Andric if (!Changed) 347fe6060f1SDimitry Andric return PreservedAnalyses::all(); 348fe6060f1SDimitry Andric 349fe6060f1SDimitry Andric auto PA = getLoopPassPreservedAnalyses(); 350fe6060f1SDimitry Andric 351fe6060f1SDimitry Andric PA.preserve<DominatorTreeAnalysis>(); 352fe6060f1SDimitry Andric PA.preserve<LoopAnalysis>(); 353fe6060f1SDimitry Andric PA.preserve<MemorySSAAnalysis>(); 354fe6060f1SDimitry Andric 355fe6060f1SDimitry Andric return PA; 356fe6060f1SDimitry Andric } 357fe6060f1SDimitry Andric 35881ad6265SDimitry Andric void LNICMPass::printPipeline( 35981ad6265SDimitry Andric raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 36081ad6265SDimitry Andric static_cast<PassInfoMixin<LNICMPass> *>(this)->printPipeline( 36181ad6265SDimitry Andric OS, MapClassName2PassName); 36281ad6265SDimitry Andric 36306c3fb27SDimitry Andric OS << '<'; 36481ad6265SDimitry Andric OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation"; 36506c3fb27SDimitry Andric OS << '>'; 36681ad6265SDimitry Andric } 36781ad6265SDimitry Andric 3680b57cec5SDimitry Andric char LegacyLICMPass::ID = 0; 3690b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", 3700b57cec5SDimitry Andric false, false) 3710b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopPass) 3720b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 3730b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 3740b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 375e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LazyBFIPass) 3760b57cec5SDimitry Andric INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, 3770b57cec5SDimitry Andric false) 3780b57cec5SDimitry Andric 3790b57cec5SDimitry Andric Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } 3800b57cec5SDimitry Andric 38106c3fb27SDimitry Andric llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop &L, 38206c3fb27SDimitry Andric MemorySSA &MSSA) 383e8d8bef9SDimitry Andric : SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap, 384e8d8bef9SDimitry Andric IsSink, L, MSSA) {} 385e8d8bef9SDimitry Andric 386e8d8bef9SDimitry Andric llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags( 387e8d8bef9SDimitry Andric unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink, 38806c3fb27SDimitry Andric Loop &L, MemorySSA &MSSA) 389e8d8bef9SDimitry Andric : LicmMssaOptCap(LicmMssaOptCap), 390e8d8bef9SDimitry Andric LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), 391e8d8bef9SDimitry Andric IsSink(IsSink) { 392e8d8bef9SDimitry Andric unsigned AccessCapCount = 0; 39306c3fb27SDimitry Andric for (auto *BB : L.getBlocks()) 39406c3fb27SDimitry Andric if (const auto *Accesses = MSSA.getBlockAccesses(BB)) 395e8d8bef9SDimitry Andric for (const auto &MA : *Accesses) { 396e8d8bef9SDimitry Andric (void)MA; 397e8d8bef9SDimitry Andric ++AccessCapCount; 398e8d8bef9SDimitry Andric if (AccessCapCount > LicmMssaNoAccForPromotionCap) { 399e8d8bef9SDimitry Andric NoOfMemAccTooLarge = true; 400e8d8bef9SDimitry Andric return; 401e8d8bef9SDimitry Andric } 402e8d8bef9SDimitry Andric } 403e8d8bef9SDimitry Andric } 404e8d8bef9SDimitry Andric 4050b57cec5SDimitry Andric /// Hoist expressions out of the specified loop. Note, alias info for inner 4060b57cec5SDimitry Andric /// loop is not preserved so it is not a good idea to run LICM multiple 4070b57cec5SDimitry Andric /// times on one loop. 408bdd1243dSDimitry Andric bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, 409bdd1243dSDimitry Andric DominatorTree *DT, AssumptionCache *AC, 410bdd1243dSDimitry Andric TargetLibraryInfo *TLI, 411bdd1243dSDimitry Andric TargetTransformInfo *TTI, 412bdd1243dSDimitry Andric ScalarEvolution *SE, MemorySSA *MSSA, 413bdd1243dSDimitry Andric OptimizationRemarkEmitter *ORE, 414fe6060f1SDimitry Andric bool LoopNestMode) { 4150b57cec5SDimitry Andric bool Changed = false; 4160b57cec5SDimitry Andric 4170b57cec5SDimitry Andric assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); 4180b57cec5SDimitry Andric 4198bcb0991SDimitry Andric // If this loop has metadata indicating that LICM is not to be performed then 4208bcb0991SDimitry Andric // just exit. 4218bcb0991SDimitry Andric if (hasDisableLICMTransformsHint(L)) { 4228bcb0991SDimitry Andric return false; 4238bcb0991SDimitry Andric } 4248bcb0991SDimitry Andric 425fe6060f1SDimitry Andric // Don't sink stores from loops with coroutine suspend instructions. 426fe6060f1SDimitry Andric // LICM would sink instructions into the default destination of 427fe6060f1SDimitry Andric // the coroutine switch. The default destination of the switch is to 428fe6060f1SDimitry Andric // handle the case where the coroutine is suspended, by which point the 429fe6060f1SDimitry Andric // coroutine frame may have been destroyed. No instruction can be sunk there. 430fe6060f1SDimitry Andric // FIXME: This would unfortunately hurt the performance of coroutines, however 431fe6060f1SDimitry Andric // there is currently no general solution for this. Similar issues could also 432fe6060f1SDimitry Andric // potentially happen in other passes where instructions are being moved 433fe6060f1SDimitry Andric // across that edge. 434fe6060f1SDimitry Andric bool HasCoroSuspendInst = llvm::any_of(L->getBlocks(), [](BasicBlock *BB) { 435fe6060f1SDimitry Andric return llvm::any_of(*BB, [](Instruction &I) { 436fe6060f1SDimitry Andric IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I); 437fe6060f1SDimitry Andric return II && II->getIntrinsicID() == Intrinsic::coro_suspend; 438fe6060f1SDimitry Andric }); 439fe6060f1SDimitry Andric }); 440fe6060f1SDimitry Andric 441349cc55cSDimitry Andric MemorySSAUpdater MSSAU(MSSA); 442349cc55cSDimitry Andric SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, 44306c3fb27SDimitry Andric /*IsSink=*/true, *L, *MSSA); 4440b57cec5SDimitry Andric 4450b57cec5SDimitry Andric // Get the preheader block to move instructions into... 4460b57cec5SDimitry Andric BasicBlock *Preheader = L->getLoopPreheader(); 4470b57cec5SDimitry Andric 4480b57cec5SDimitry Andric // Compute loop safety information. 4495ffd83dbSDimitry Andric ICFLoopSafetyInfo SafetyInfo; 4500b57cec5SDimitry Andric SafetyInfo.computeLoopSafetyInfo(L); 4510b57cec5SDimitry Andric 4520b57cec5SDimitry Andric // We want to visit all of the instructions in this loop... that are not parts 4530b57cec5SDimitry Andric // of our subloops (they have already had their invariants hoisted out of 4540b57cec5SDimitry Andric // their loop, into this loop, so there is no need to process the BODIES of 4550b57cec5SDimitry Andric // the subloops). 4560b57cec5SDimitry Andric // 4570b57cec5SDimitry Andric // Traverse the body of the loop in depth first order on the dominator tree so 4580b57cec5SDimitry Andric // that we are guaranteed to see definitions before we see uses. This allows 4590b57cec5SDimitry Andric // us to sink instructions in one pass, without iteration. After sinking 4600b57cec5SDimitry Andric // instructions, we perform another pass to hoist them out of the loop. 4610b57cec5SDimitry Andric if (L->hasDedicatedExits()) 462bdd1243dSDimitry Andric Changed |= 463bdd1243dSDimitry Andric LoopNestMode 464bdd1243dSDimitry Andric ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI, DT, 465bdd1243dSDimitry Andric TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE) 466bdd1243dSDimitry Andric : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, 467bdd1243dSDimitry Andric MSSAU, &SafetyInfo, Flags, ORE); 468349cc55cSDimitry Andric Flags.setIsSink(false); 469e8d8bef9SDimitry Andric if (Preheader) 470bdd1243dSDimitry Andric Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, AC, TLI, L, 47181ad6265SDimitry Andric MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode, 472fb03ea46SDimitry Andric LicmAllowSpeculation); 4730b57cec5SDimitry Andric 4740b57cec5SDimitry Andric // Now that all loop invariants have been removed from the loop, promote any 4750b57cec5SDimitry Andric // memory references to scalars that we can. 4760b57cec5SDimitry Andric // Don't sink stores from loops without dedicated block exits. Exits 4770b57cec5SDimitry Andric // containing indirect branches are not transformed by loop simplify, 4780b57cec5SDimitry Andric // make sure we catch that. An additional load may be generated in the 4790b57cec5SDimitry Andric // preheader for SSA updater, so also avoid sinking when no preheader 4800b57cec5SDimitry Andric // is available. 4810b57cec5SDimitry Andric if (!DisablePromotion && Preheader && L->hasDedicatedExits() && 482349cc55cSDimitry Andric !Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) { 4830b57cec5SDimitry Andric // Figure out the loop exits and their insertion points 4840b57cec5SDimitry Andric SmallVector<BasicBlock *, 8> ExitBlocks; 4850b57cec5SDimitry Andric L->getUniqueExitBlocks(ExitBlocks); 4860b57cec5SDimitry Andric 4870b57cec5SDimitry Andric // We can't insert into a catchswitch. 4880b57cec5SDimitry Andric bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) { 4890b57cec5SDimitry Andric return isa<CatchSwitchInst>(Exit->getTerminator()); 4900b57cec5SDimitry Andric }); 4910b57cec5SDimitry Andric 4920b57cec5SDimitry Andric if (!HasCatchSwitch) { 4935f757f3fSDimitry Andric SmallVector<BasicBlock::iterator, 8> InsertPts; 4940b57cec5SDimitry Andric SmallVector<MemoryAccess *, 8> MSSAInsertPts; 4950b57cec5SDimitry Andric InsertPts.reserve(ExitBlocks.size()); 4960b57cec5SDimitry Andric MSSAInsertPts.reserve(ExitBlocks.size()); 4970b57cec5SDimitry Andric for (BasicBlock *ExitBlock : ExitBlocks) { 4985f757f3fSDimitry Andric InsertPts.push_back(ExitBlock->getFirstInsertionPt()); 4990b57cec5SDimitry Andric MSSAInsertPts.push_back(nullptr); 5000b57cec5SDimitry Andric } 5010b57cec5SDimitry Andric 5020b57cec5SDimitry Andric PredIteratorCache PIC; 5030b57cec5SDimitry Andric 504fe6060f1SDimitry Andric // Promoting one set of accesses may make the pointers for another set 50581ad6265SDimitry Andric // loop invariant, so run this in a loop. 506349cc55cSDimitry Andric bool Promoted = false; 507fe6060f1SDimitry Andric bool LocalPromoted; 508fe6060f1SDimitry Andric do { 509fe6060f1SDimitry Andric LocalPromoted = false; 510bdd1243dSDimitry Andric for (auto [PointerMustAliases, HasReadsOutsideSet] : 511fe6060f1SDimitry Andric collectPromotionCandidates(MSSA, AA, L)) { 512fe6060f1SDimitry Andric LocalPromoted |= promoteLoopAccessesToScalars( 513fb03ea46SDimitry Andric PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, 514bdd1243dSDimitry Andric DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE, 515bdd1243dSDimitry Andric LicmAllowSpeculation, HasReadsOutsideSet); 516fe6060f1SDimitry Andric } 517fe6060f1SDimitry Andric Promoted |= LocalPromoted; 518fe6060f1SDimitry Andric } while (LocalPromoted); 5190b57cec5SDimitry Andric 5200b57cec5SDimitry Andric // Once we have promoted values across the loop body we have to 5210b57cec5SDimitry Andric // recursively reform LCSSA as any nested loop may now have values defined 5220b57cec5SDimitry Andric // within the loop used in the outer loop. 5230b57cec5SDimitry Andric // FIXME: This is really heavy handed. It would be a bit better to use an 5240b57cec5SDimitry Andric // SSAUpdater strategy during promotion that was LCSSA aware and reformed 5250b57cec5SDimitry Andric // it as it went. 5260b57cec5SDimitry Andric if (Promoted) 5270b57cec5SDimitry Andric formLCSSARecursively(*L, *DT, LI, SE); 5280b57cec5SDimitry Andric 5290b57cec5SDimitry Andric Changed |= Promoted; 5300b57cec5SDimitry Andric } 5310b57cec5SDimitry Andric } 5320b57cec5SDimitry Andric 5330b57cec5SDimitry Andric // Check that neither this loop nor its parent have had LCSSA broken. LICM is 5340b57cec5SDimitry Andric // specifically moving instructions across the loop boundary and so it is 5354824e7fdSDimitry Andric // especially in need of basic functional correctness checking here. 5360b57cec5SDimitry Andric assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!"); 537e8d8bef9SDimitry Andric assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) && 5380b57cec5SDimitry Andric "Parent loop not left in LCSSA form after LICM!"); 5390b57cec5SDimitry Andric 540349cc55cSDimitry Andric if (VerifyMemorySSA) 541349cc55cSDimitry Andric MSSA->verifyMemorySSA(); 5420b57cec5SDimitry Andric 5430b57cec5SDimitry Andric if (Changed && SE) 544bdd1243dSDimitry Andric SE->forgetLoopDispositions(); 5450b57cec5SDimitry Andric return Changed; 5460b57cec5SDimitry Andric } 5470b57cec5SDimitry Andric 5480b57cec5SDimitry Andric /// Walk the specified region of the CFG (defined by all blocks dominated by 5490b57cec5SDimitry Andric /// the specified block, and that are in the current loop) in reverse depth 5500b57cec5SDimitry Andric /// first order w.r.t the DominatorTree. This allows us to visit uses before 5510b57cec5SDimitry Andric /// definitions, allowing us to sink a loop body in one pass without iteration. 5520b57cec5SDimitry Andric /// 5535ffd83dbSDimitry Andric bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, 554bdd1243dSDimitry Andric DominatorTree *DT, TargetLibraryInfo *TLI, 555bdd1243dSDimitry Andric TargetTransformInfo *TTI, Loop *CurLoop, 556bdd1243dSDimitry Andric MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo, 5570b57cec5SDimitry Andric SinkAndHoistLICMFlags &Flags, 558349cc55cSDimitry Andric OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) { 5590b57cec5SDimitry Andric 5600b57cec5SDimitry Andric // Verify inputs. 5610b57cec5SDimitry Andric assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && 56281ad6265SDimitry Andric CurLoop != nullptr && SafetyInfo != nullptr && 5630b57cec5SDimitry Andric "Unexpected input to sinkRegion."); 5640b57cec5SDimitry Andric 56581ad6265SDimitry Andric // We want to visit children before parents. We will enqueue all the parents 5660b57cec5SDimitry Andric // before their children in the worklist and process the worklist in reverse 5670b57cec5SDimitry Andric // order. 5680b57cec5SDimitry Andric SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop); 5690b57cec5SDimitry Andric 5700b57cec5SDimitry Andric bool Changed = false; 5710b57cec5SDimitry Andric for (DomTreeNode *DTN : reverse(Worklist)) { 5720b57cec5SDimitry Andric BasicBlock *BB = DTN->getBlock(); 5730b57cec5SDimitry Andric // Only need to process the contents of this block if it is not part of a 5740b57cec5SDimitry Andric // subloop (which would already have been processed). 5750b57cec5SDimitry Andric if (inSubLoop(BB, CurLoop, LI)) 5760b57cec5SDimitry Andric continue; 5770b57cec5SDimitry Andric 5780b57cec5SDimitry Andric for (BasicBlock::iterator II = BB->end(); II != BB->begin();) { 5790b57cec5SDimitry Andric Instruction &I = *--II; 5800b57cec5SDimitry Andric 581fe6060f1SDimitry Andric // The instruction is not used in the loop if it is dead. In this case, 582fe6060f1SDimitry Andric // we just delete it instead of sinking it. 5830b57cec5SDimitry Andric if (isInstructionTriviallyDead(&I, TLI)) { 5840b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); 5855ffd83dbSDimitry Andric salvageKnowledge(&I); 5860b57cec5SDimitry Andric salvageDebugInfo(I); 5870b57cec5SDimitry Andric ++II; 588349cc55cSDimitry Andric eraseInstruction(I, *SafetyInfo, MSSAU); 5890b57cec5SDimitry Andric Changed = true; 5900b57cec5SDimitry Andric continue; 5910b57cec5SDimitry Andric } 5920b57cec5SDimitry Andric 5930b57cec5SDimitry Andric // Check to see if we can sink this instruction to the exit blocks 5940b57cec5SDimitry Andric // of the loop. We can do this if the all users of the instruction are 5950b57cec5SDimitry Andric // outside of the loop. In this case, it doesn't even matter if the 5960b57cec5SDimitry Andric // operands of the instruction are loop invariant. 5970b57cec5SDimitry Andric // 59806c3fb27SDimitry Andric bool FoldableInLoop = false; 599349cc55cSDimitry Andric bool LoopNestMode = OutermostLoop != nullptr; 6005ffd83dbSDimitry Andric if (!I.mayHaveSideEffects() && 60106c3fb27SDimitry Andric isNotUsedOrFoldableInLoop(I, LoopNestMode ? OutermostLoop : CurLoop, 60206c3fb27SDimitry Andric SafetyInfo, TTI, FoldableInLoop, 60306c3fb27SDimitry Andric LoopNestMode) && 60481ad6265SDimitry Andric canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE)) { 605bdd1243dSDimitry Andric if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) { 60606c3fb27SDimitry Andric if (!FoldableInLoop) { 6070b57cec5SDimitry Andric ++II; 6085ffd83dbSDimitry Andric salvageDebugInfo(I); 609349cc55cSDimitry Andric eraseInstruction(I, *SafetyInfo, MSSAU); 6100b57cec5SDimitry Andric } 6110b57cec5SDimitry Andric Changed = true; 6120b57cec5SDimitry Andric } 6130b57cec5SDimitry Andric } 6140b57cec5SDimitry Andric } 6150b57cec5SDimitry Andric } 616349cc55cSDimitry Andric if (VerifyMemorySSA) 61781ad6265SDimitry Andric MSSAU.getMemorySSA()->verifyMemorySSA(); 6180b57cec5SDimitry Andric return Changed; 6190b57cec5SDimitry Andric } 6200b57cec5SDimitry Andric 621bdd1243dSDimitry Andric bool llvm::sinkRegionForLoopNest(DomTreeNode *N, AAResults *AA, LoopInfo *LI, 622bdd1243dSDimitry Andric DominatorTree *DT, TargetLibraryInfo *TLI, 623bdd1243dSDimitry Andric TargetTransformInfo *TTI, Loop *CurLoop, 624bdd1243dSDimitry Andric MemorySSAUpdater &MSSAU, 625bdd1243dSDimitry Andric ICFLoopSafetyInfo *SafetyInfo, 626bdd1243dSDimitry Andric SinkAndHoistLICMFlags &Flags, 627bdd1243dSDimitry Andric OptimizationRemarkEmitter *ORE) { 628349cc55cSDimitry Andric 629349cc55cSDimitry Andric bool Changed = false; 630349cc55cSDimitry Andric SmallPriorityWorklist<Loop *, 4> Worklist; 631349cc55cSDimitry Andric Worklist.insert(CurLoop); 632349cc55cSDimitry Andric appendLoopsToWorklist(*CurLoop, Worklist); 633349cc55cSDimitry Andric while (!Worklist.empty()) { 634349cc55cSDimitry Andric Loop *L = Worklist.pop_back_val(); 635bdd1243dSDimitry Andric Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, 636bdd1243dSDimitry Andric MSSAU, SafetyInfo, Flags, ORE, CurLoop); 637349cc55cSDimitry Andric } 638349cc55cSDimitry Andric return Changed; 639349cc55cSDimitry Andric } 640349cc55cSDimitry Andric 6410b57cec5SDimitry Andric namespace { 6420b57cec5SDimitry Andric // This is a helper class for hoistRegion to make it able to hoist control flow 6430b57cec5SDimitry Andric // in order to be able to hoist phis. The way this works is that we initially 6440b57cec5SDimitry Andric // start hoisting to the loop preheader, and when we see a loop invariant branch 6450b57cec5SDimitry Andric // we make note of this. When we then come to hoist an instruction that's 6460b57cec5SDimitry Andric // conditional on such a branch we duplicate the branch and the relevant control 6470b57cec5SDimitry Andric // flow, then hoist the instruction into the block corresponding to its original 6480b57cec5SDimitry Andric // block in the duplicated control flow. 6490b57cec5SDimitry Andric class ControlFlowHoister { 6500b57cec5SDimitry Andric private: 6510b57cec5SDimitry Andric // Information about the loop we are hoisting from 6520b57cec5SDimitry Andric LoopInfo *LI; 6530b57cec5SDimitry Andric DominatorTree *DT; 6540b57cec5SDimitry Andric Loop *CurLoop; 65581ad6265SDimitry Andric MemorySSAUpdater &MSSAU; 6560b57cec5SDimitry Andric 6570b57cec5SDimitry Andric // A map of blocks in the loop to the block their instructions will be hoisted 6580b57cec5SDimitry Andric // to. 6590b57cec5SDimitry Andric DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap; 6600b57cec5SDimitry Andric 6610b57cec5SDimitry Andric // The branches that we can hoist, mapped to the block that marks a 6620b57cec5SDimitry Andric // convergence point of their control flow. 6630b57cec5SDimitry Andric DenseMap<BranchInst *, BasicBlock *> HoistableBranches; 6640b57cec5SDimitry Andric 6650b57cec5SDimitry Andric public: 6660b57cec5SDimitry Andric ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop, 66781ad6265SDimitry Andric MemorySSAUpdater &MSSAU) 6680b57cec5SDimitry Andric : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {} 6690b57cec5SDimitry Andric 6700b57cec5SDimitry Andric void registerPossiblyHoistableBranch(BranchInst *BI) { 6710b57cec5SDimitry Andric // We can only hoist conditional branches with loop invariant operands. 6720b57cec5SDimitry Andric if (!ControlFlowHoisting || !BI->isConditional() || 6730b57cec5SDimitry Andric !CurLoop->hasLoopInvariantOperands(BI)) 6740b57cec5SDimitry Andric return; 6750b57cec5SDimitry Andric 6760b57cec5SDimitry Andric // The branch destinations need to be in the loop, and we don't gain 6770b57cec5SDimitry Andric // anything by duplicating conditional branches with duplicate successors, 6780b57cec5SDimitry Andric // as it's essentially the same as an unconditional branch. 6790b57cec5SDimitry Andric BasicBlock *TrueDest = BI->getSuccessor(0); 6800b57cec5SDimitry Andric BasicBlock *FalseDest = BI->getSuccessor(1); 6810b57cec5SDimitry Andric if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) || 6820b57cec5SDimitry Andric TrueDest == FalseDest) 6830b57cec5SDimitry Andric return; 6840b57cec5SDimitry Andric 6850b57cec5SDimitry Andric // We can hoist BI if one branch destination is the successor of the other, 6860b57cec5SDimitry Andric // or both have common successor which we check by seeing if the 6870b57cec5SDimitry Andric // intersection of their successors is non-empty. 6880b57cec5SDimitry Andric // TODO: This could be expanded to allowing branches where both ends 6890b57cec5SDimitry Andric // eventually converge to a single block. 6900b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc; 6910b57cec5SDimitry Andric TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest)); 6920b57cec5SDimitry Andric FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest)); 6930b57cec5SDimitry Andric BasicBlock *CommonSucc = nullptr; 6940b57cec5SDimitry Andric if (TrueDestSucc.count(FalseDest)) { 6950b57cec5SDimitry Andric CommonSucc = FalseDest; 6960b57cec5SDimitry Andric } else if (FalseDestSucc.count(TrueDest)) { 6970b57cec5SDimitry Andric CommonSucc = TrueDest; 6980b57cec5SDimitry Andric } else { 6990b57cec5SDimitry Andric set_intersect(TrueDestSucc, FalseDestSucc); 7000b57cec5SDimitry Andric // If there's one common successor use that. 7010b57cec5SDimitry Andric if (TrueDestSucc.size() == 1) 7020b57cec5SDimitry Andric CommonSucc = *TrueDestSucc.begin(); 7030b57cec5SDimitry Andric // If there's more than one pick whichever appears first in the block list 7040b57cec5SDimitry Andric // (we can't use the value returned by TrueDestSucc.begin() as it's 7050b57cec5SDimitry Andric // unpredicatable which element gets returned). 7060b57cec5SDimitry Andric else if (!TrueDestSucc.empty()) { 7070b57cec5SDimitry Andric Function *F = TrueDest->getParent(); 7080b57cec5SDimitry Andric auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); }; 709e8d8bef9SDimitry Andric auto It = llvm::find_if(*F, IsSucc); 7100b57cec5SDimitry Andric assert(It != F->end() && "Could not find successor in function"); 7110b57cec5SDimitry Andric CommonSucc = &*It; 7120b57cec5SDimitry Andric } 7130b57cec5SDimitry Andric } 7140b57cec5SDimitry Andric // The common successor has to be dominated by the branch, as otherwise 7150b57cec5SDimitry Andric // there will be some other path to the successor that will not be 7160b57cec5SDimitry Andric // controlled by this branch so any phi we hoist would be controlled by the 7170b57cec5SDimitry Andric // wrong condition. This also takes care of avoiding hoisting of loop back 7180b57cec5SDimitry Andric // edges. 7190b57cec5SDimitry Andric // TODO: In some cases this could be relaxed if the successor is dominated 7200b57cec5SDimitry Andric // by another block that's been hoisted and we can guarantee that the 7210b57cec5SDimitry Andric // control flow has been replicated exactly. 7220b57cec5SDimitry Andric if (CommonSucc && DT->dominates(BI, CommonSucc)) 7230b57cec5SDimitry Andric HoistableBranches[BI] = CommonSucc; 7240b57cec5SDimitry Andric } 7250b57cec5SDimitry Andric 7260b57cec5SDimitry Andric bool canHoistPHI(PHINode *PN) { 7270b57cec5SDimitry Andric // The phi must have loop invariant operands. 7280b57cec5SDimitry Andric if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN)) 7290b57cec5SDimitry Andric return false; 7300b57cec5SDimitry Andric // We can hoist phis if the block they are in is the target of hoistable 7310b57cec5SDimitry Andric // branches which cover all of the predecessors of the block. 7320b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 8> PredecessorBlocks; 7330b57cec5SDimitry Andric BasicBlock *BB = PN->getParent(); 7340b57cec5SDimitry Andric for (BasicBlock *PredBB : predecessors(BB)) 7350b57cec5SDimitry Andric PredecessorBlocks.insert(PredBB); 7360b57cec5SDimitry Andric // If we have less predecessor blocks than predecessors then the phi will 7370b57cec5SDimitry Andric // have more than one incoming value for the same block which we can't 7380b57cec5SDimitry Andric // handle. 7390b57cec5SDimitry Andric // TODO: This could be handled be erasing some of the duplicate incoming 7400b57cec5SDimitry Andric // values. 7410b57cec5SDimitry Andric if (PredecessorBlocks.size() != pred_size(BB)) 7420b57cec5SDimitry Andric return false; 7430b57cec5SDimitry Andric for (auto &Pair : HoistableBranches) { 7440b57cec5SDimitry Andric if (Pair.second == BB) { 7450b57cec5SDimitry Andric // Which blocks are predecessors via this branch depends on if the 7460b57cec5SDimitry Andric // branch is triangle-like or diamond-like. 7470b57cec5SDimitry Andric if (Pair.first->getSuccessor(0) == BB) { 7480b57cec5SDimitry Andric PredecessorBlocks.erase(Pair.first->getParent()); 7490b57cec5SDimitry Andric PredecessorBlocks.erase(Pair.first->getSuccessor(1)); 7500b57cec5SDimitry Andric } else if (Pair.first->getSuccessor(1) == BB) { 7510b57cec5SDimitry Andric PredecessorBlocks.erase(Pair.first->getParent()); 7520b57cec5SDimitry Andric PredecessorBlocks.erase(Pair.first->getSuccessor(0)); 7530b57cec5SDimitry Andric } else { 7540b57cec5SDimitry Andric PredecessorBlocks.erase(Pair.first->getSuccessor(0)); 7550b57cec5SDimitry Andric PredecessorBlocks.erase(Pair.first->getSuccessor(1)); 7560b57cec5SDimitry Andric } 7570b57cec5SDimitry Andric } 7580b57cec5SDimitry Andric } 7590b57cec5SDimitry Andric // PredecessorBlocks will now be empty if for every predecessor of BB we 7600b57cec5SDimitry Andric // found a hoistable branch source. 7610b57cec5SDimitry Andric return PredecessorBlocks.empty(); 7620b57cec5SDimitry Andric } 7630b57cec5SDimitry Andric 7640b57cec5SDimitry Andric BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) { 7650b57cec5SDimitry Andric if (!ControlFlowHoisting) 7660b57cec5SDimitry Andric return CurLoop->getLoopPreheader(); 7670b57cec5SDimitry Andric // If BB has already been hoisted, return that 7680b57cec5SDimitry Andric if (HoistDestinationMap.count(BB)) 7690b57cec5SDimitry Andric return HoistDestinationMap[BB]; 7700b57cec5SDimitry Andric 7710b57cec5SDimitry Andric // Check if this block is conditional based on a pending branch 7720b57cec5SDimitry Andric auto HasBBAsSuccessor = 7730b57cec5SDimitry Andric [&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) { 7740b57cec5SDimitry Andric return BB != Pair.second && (Pair.first->getSuccessor(0) == BB || 7750b57cec5SDimitry Andric Pair.first->getSuccessor(1) == BB); 7760b57cec5SDimitry Andric }; 777e8d8bef9SDimitry Andric auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor); 7780b57cec5SDimitry Andric 7790b57cec5SDimitry Andric // If not involved in a pending branch, hoist to preheader 7800b57cec5SDimitry Andric BasicBlock *InitialPreheader = CurLoop->getLoopPreheader(); 7810b57cec5SDimitry Andric if (It == HoistableBranches.end()) { 782e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "LICM using " 783e8d8bef9SDimitry Andric << InitialPreheader->getNameOrAsOperand() 784e8d8bef9SDimitry Andric << " as hoist destination for " 785e8d8bef9SDimitry Andric << BB->getNameOrAsOperand() << "\n"); 7860b57cec5SDimitry Andric HoistDestinationMap[BB] = InitialPreheader; 7870b57cec5SDimitry Andric return InitialPreheader; 7880b57cec5SDimitry Andric } 7890b57cec5SDimitry Andric BranchInst *BI = It->first; 7900b57cec5SDimitry Andric assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) == 7910b57cec5SDimitry Andric HoistableBranches.end() && 7920b57cec5SDimitry Andric "BB is expected to be the target of at most one branch"); 7930b57cec5SDimitry Andric 7940b57cec5SDimitry Andric LLVMContext &C = BB->getContext(); 7950b57cec5SDimitry Andric BasicBlock *TrueDest = BI->getSuccessor(0); 7960b57cec5SDimitry Andric BasicBlock *FalseDest = BI->getSuccessor(1); 7970b57cec5SDimitry Andric BasicBlock *CommonSucc = HoistableBranches[BI]; 7980b57cec5SDimitry Andric BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent()); 7990b57cec5SDimitry Andric 8000b57cec5SDimitry Andric // Create hoisted versions of blocks that currently don't have them 8010b57cec5SDimitry Andric auto CreateHoistedBlock = [&](BasicBlock *Orig) { 8020b57cec5SDimitry Andric if (HoistDestinationMap.count(Orig)) 8030b57cec5SDimitry Andric return HoistDestinationMap[Orig]; 8040b57cec5SDimitry Andric BasicBlock *New = 8050b57cec5SDimitry Andric BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent()); 8060b57cec5SDimitry Andric HoistDestinationMap[Orig] = New; 8070b57cec5SDimitry Andric DT->addNewBlock(New, HoistTarget); 8080b57cec5SDimitry Andric if (CurLoop->getParentLoop()) 8090b57cec5SDimitry Andric CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI); 8100b57cec5SDimitry Andric ++NumCreatedBlocks; 8110b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LICM created " << New->getName() 8120b57cec5SDimitry Andric << " as hoist destination for " << Orig->getName() 8130b57cec5SDimitry Andric << "\n"); 8140b57cec5SDimitry Andric return New; 8150b57cec5SDimitry Andric }; 8160b57cec5SDimitry Andric BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest); 8170b57cec5SDimitry Andric BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest); 8180b57cec5SDimitry Andric BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc); 8190b57cec5SDimitry Andric 8200b57cec5SDimitry Andric // Link up these blocks with branches. 8210b57cec5SDimitry Andric if (!HoistCommonSucc->getTerminator()) { 8220b57cec5SDimitry Andric // The new common successor we've generated will branch to whatever that 8230b57cec5SDimitry Andric // hoist target branched to. 8240b57cec5SDimitry Andric BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor(); 8250b57cec5SDimitry Andric assert(TargetSucc && "Expected hoist target to have a single successor"); 8260b57cec5SDimitry Andric HoistCommonSucc->moveBefore(TargetSucc); 8270b57cec5SDimitry Andric BranchInst::Create(TargetSucc, HoistCommonSucc); 8280b57cec5SDimitry Andric } 8290b57cec5SDimitry Andric if (!HoistTrueDest->getTerminator()) { 8300b57cec5SDimitry Andric HoistTrueDest->moveBefore(HoistCommonSucc); 8310b57cec5SDimitry Andric BranchInst::Create(HoistCommonSucc, HoistTrueDest); 8320b57cec5SDimitry Andric } 8330b57cec5SDimitry Andric if (!HoistFalseDest->getTerminator()) { 8340b57cec5SDimitry Andric HoistFalseDest->moveBefore(HoistCommonSucc); 8350b57cec5SDimitry Andric BranchInst::Create(HoistCommonSucc, HoistFalseDest); 8360b57cec5SDimitry Andric } 8370b57cec5SDimitry Andric 8380b57cec5SDimitry Andric // If BI is being cloned to what was originally the preheader then 8390b57cec5SDimitry Andric // HoistCommonSucc will now be the new preheader. 8400b57cec5SDimitry Andric if (HoistTarget == InitialPreheader) { 8410b57cec5SDimitry Andric // Phis in the loop header now need to use the new preheader. 8420b57cec5SDimitry Andric InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc); 84381ad6265SDimitry Andric MSSAU.wireOldPredecessorsToNewImmediatePredecessor( 8440b57cec5SDimitry Andric HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget}); 8450b57cec5SDimitry Andric // The new preheader dominates the loop header. 8460b57cec5SDimitry Andric DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc); 8470b57cec5SDimitry Andric DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader()); 8480b57cec5SDimitry Andric DT->changeImmediateDominator(HeaderNode, PreheaderNode); 8490b57cec5SDimitry Andric // The preheader hoist destination is now the new preheader, with the 8500b57cec5SDimitry Andric // exception of the hoist destination of this branch. 8510b57cec5SDimitry Andric for (auto &Pair : HoistDestinationMap) 8520b57cec5SDimitry Andric if (Pair.second == InitialPreheader && Pair.first != BI->getParent()) 8530b57cec5SDimitry Andric Pair.second = HoistCommonSucc; 8540b57cec5SDimitry Andric } 8550b57cec5SDimitry Andric 8560b57cec5SDimitry Andric // Now finally clone BI. 8570b57cec5SDimitry Andric ReplaceInstWithInst( 8580b57cec5SDimitry Andric HoistTarget->getTerminator(), 8590b57cec5SDimitry Andric BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition())); 8600b57cec5SDimitry Andric ++NumClonedBranches; 8610b57cec5SDimitry Andric 8620b57cec5SDimitry Andric assert(CurLoop->getLoopPreheader() && 8630b57cec5SDimitry Andric "Hoisting blocks should not have destroyed preheader"); 8640b57cec5SDimitry Andric return HoistDestinationMap[BB]; 8650b57cec5SDimitry Andric } 8660b57cec5SDimitry Andric }; 8670b57cec5SDimitry Andric } // namespace 8680b57cec5SDimitry Andric 8690b57cec5SDimitry Andric /// Walk the specified region of the CFG (defined by all blocks dominated by 8700b57cec5SDimitry Andric /// the specified block, and that are in the current loop) in depth first 8710b57cec5SDimitry Andric /// order w.r.t the DominatorTree. This allows us to visit definitions before 8720b57cec5SDimitry Andric /// uses, allowing us to hoist a loop body in one pass without iteration. 8730b57cec5SDimitry Andric /// 8745ffd83dbSDimitry Andric bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, 875bdd1243dSDimitry Andric DominatorTree *DT, AssumptionCache *AC, 876e8d8bef9SDimitry Andric TargetLibraryInfo *TLI, Loop *CurLoop, 87781ad6265SDimitry Andric MemorySSAUpdater &MSSAU, ScalarEvolution *SE, 878349cc55cSDimitry Andric ICFLoopSafetyInfo *SafetyInfo, 8790b57cec5SDimitry Andric SinkAndHoistLICMFlags &Flags, 880fb03ea46SDimitry Andric OptimizationRemarkEmitter *ORE, bool LoopNestMode, 881fb03ea46SDimitry Andric bool AllowSpeculation) { 8820b57cec5SDimitry Andric // Verify inputs. 8830b57cec5SDimitry Andric assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && 88481ad6265SDimitry Andric CurLoop != nullptr && SafetyInfo != nullptr && 8850b57cec5SDimitry Andric "Unexpected input to hoistRegion."); 8860b57cec5SDimitry Andric 8870b57cec5SDimitry Andric ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU); 8880b57cec5SDimitry Andric 8890b57cec5SDimitry Andric // Keep track of instructions that have been hoisted, as they may need to be 8900b57cec5SDimitry Andric // re-hoisted if they end up not dominating all of their uses. 8910b57cec5SDimitry Andric SmallVector<Instruction *, 16> HoistedInstructions; 8920b57cec5SDimitry Andric 8930b57cec5SDimitry Andric // For PHI hoisting to work we need to hoist blocks before their successors. 8940b57cec5SDimitry Andric // We can do this by iterating through the blocks in the loop in reverse 8950b57cec5SDimitry Andric // post-order. 8960b57cec5SDimitry Andric LoopBlocksRPO Worklist(CurLoop); 8970b57cec5SDimitry Andric Worklist.perform(LI); 8980b57cec5SDimitry Andric bool Changed = false; 89906c3fb27SDimitry Andric BasicBlock *Preheader = CurLoop->getLoopPreheader(); 9000b57cec5SDimitry Andric for (BasicBlock *BB : Worklist) { 9010b57cec5SDimitry Andric // Only need to process the contents of this block if it is not part of a 9020b57cec5SDimitry Andric // subloop (which would already have been processed). 903fe6060f1SDimitry Andric if (!LoopNestMode && inSubLoop(BB, CurLoop, LI)) 9040b57cec5SDimitry Andric continue; 9050b57cec5SDimitry Andric 906349cc55cSDimitry Andric for (Instruction &I : llvm::make_early_inc_range(*BB)) { 9070b57cec5SDimitry Andric // Try hoisting the instruction out to the preheader. We can only do 9080b57cec5SDimitry Andric // this if all of the operands of the instruction are loop invariant and 909e8d8bef9SDimitry Andric // if it is safe to hoist the instruction. We also check block frequency 910e8d8bef9SDimitry Andric // to make sure instruction only gets hoisted into colder blocks. 9110b57cec5SDimitry Andric // TODO: It may be safe to hoist if we are hoisting to a conditional block 9120b57cec5SDimitry Andric // and we have accurately duplicated the control flow from the loop header 9130b57cec5SDimitry Andric // to that block. 9140b57cec5SDimitry Andric if (CurLoop->hasLoopInvariantOperands(&I) && 91581ad6265SDimitry Andric canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE) && 9160b57cec5SDimitry Andric isSafeToExecuteUnconditionally( 917fe6060f1SDimitry Andric I, DT, TLI, CurLoop, SafetyInfo, ORE, 91806c3fb27SDimitry Andric Preheader->getTerminator(), AC, AllowSpeculation)) { 9190b57cec5SDimitry Andric hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, 920480093f4SDimitry Andric MSSAU, SE, ORE); 9210b57cec5SDimitry Andric HoistedInstructions.push_back(&I); 9220b57cec5SDimitry Andric Changed = true; 9230b57cec5SDimitry Andric continue; 9240b57cec5SDimitry Andric } 9250b57cec5SDimitry Andric 9260b57cec5SDimitry Andric // Attempt to remove floating point division out of the loop by 9270b57cec5SDimitry Andric // converting it to a reciprocal multiplication. 9285ffd83dbSDimitry Andric if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() && 9295ffd83dbSDimitry Andric CurLoop->isLoopInvariant(I.getOperand(1))) { 9300b57cec5SDimitry Andric auto Divisor = I.getOperand(1); 9310b57cec5SDimitry Andric auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0); 9320b57cec5SDimitry Andric auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor); 9330b57cec5SDimitry Andric ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags()); 9340b57cec5SDimitry Andric SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent()); 9350b57cec5SDimitry Andric ReciprocalDivisor->insertBefore(&I); 9360fca6ea1SDimitry Andric ReciprocalDivisor->setDebugLoc(I.getDebugLoc()); 9370b57cec5SDimitry Andric 9380b57cec5SDimitry Andric auto Product = 9390b57cec5SDimitry Andric BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor); 9400b57cec5SDimitry Andric Product->setFastMathFlags(I.getFastMathFlags()); 9410b57cec5SDimitry Andric SafetyInfo->insertInstructionTo(Product, I.getParent()); 9420b57cec5SDimitry Andric Product->insertAfter(&I); 9430fca6ea1SDimitry Andric Product->setDebugLoc(I.getDebugLoc()); 9440b57cec5SDimitry Andric I.replaceAllUsesWith(Product); 945349cc55cSDimitry Andric eraseInstruction(I, *SafetyInfo, MSSAU); 9460b57cec5SDimitry Andric 9470b57cec5SDimitry Andric hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), 948480093f4SDimitry Andric SafetyInfo, MSSAU, SE, ORE); 9490b57cec5SDimitry Andric HoistedInstructions.push_back(ReciprocalDivisor); 9500b57cec5SDimitry Andric Changed = true; 9510b57cec5SDimitry Andric continue; 9520b57cec5SDimitry Andric } 9530b57cec5SDimitry Andric 9540b57cec5SDimitry Andric auto IsInvariantStart = [&](Instruction &I) { 9550b57cec5SDimitry Andric using namespace PatternMatch; 9560b57cec5SDimitry Andric return I.use_empty() && 9570b57cec5SDimitry Andric match(&I, m_Intrinsic<Intrinsic::invariant_start>()); 9580b57cec5SDimitry Andric }; 9590b57cec5SDimitry Andric auto MustExecuteWithoutWritesBefore = [&](Instruction &I) { 9600b57cec5SDimitry Andric return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) && 9610b57cec5SDimitry Andric SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop); 9620b57cec5SDimitry Andric }; 9630b57cec5SDimitry Andric if ((IsInvariantStart(I) || isGuard(&I)) && 9640b57cec5SDimitry Andric CurLoop->hasLoopInvariantOperands(&I) && 9650b57cec5SDimitry Andric MustExecuteWithoutWritesBefore(I)) { 9660b57cec5SDimitry Andric hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, 967480093f4SDimitry Andric MSSAU, SE, ORE); 968480093f4SDimitry Andric HoistedInstructions.push_back(&I); 969480093f4SDimitry Andric Changed = true; 970480093f4SDimitry Andric continue; 971480093f4SDimitry Andric } 972480093f4SDimitry Andric 9730b57cec5SDimitry Andric if (PHINode *PN = dyn_cast<PHINode>(&I)) { 9740b57cec5SDimitry Andric if (CFH.canHoistPHI(PN)) { 9750b57cec5SDimitry Andric // Redirect incoming blocks first to ensure that we create hoisted 9760b57cec5SDimitry Andric // versions of those blocks before we hoist the phi. 9770b57cec5SDimitry Andric for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i) 9780b57cec5SDimitry Andric PN->setIncomingBlock( 9790b57cec5SDimitry Andric i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i))); 9800b57cec5SDimitry Andric hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, 981480093f4SDimitry Andric MSSAU, SE, ORE); 9820b57cec5SDimitry Andric assert(DT->dominates(PN, BB) && "Conditional PHIs not expected"); 9830b57cec5SDimitry Andric Changed = true; 9840b57cec5SDimitry Andric continue; 9850b57cec5SDimitry Andric } 9860b57cec5SDimitry Andric } 9870b57cec5SDimitry Andric 98806c3fb27SDimitry Andric // Try to reassociate instructions so that part of computations can be 98906c3fb27SDimitry Andric // done out of loop. 99006c3fb27SDimitry Andric if (hoistArithmetics(I, *CurLoop, *SafetyInfo, MSSAU, AC, DT)) { 99106c3fb27SDimitry Andric Changed = true; 99206c3fb27SDimitry Andric continue; 99306c3fb27SDimitry Andric } 99406c3fb27SDimitry Andric 9950b57cec5SDimitry Andric // Remember possibly hoistable branches so we can actually hoist them 9960b57cec5SDimitry Andric // later if needed. 9970b57cec5SDimitry Andric if (BranchInst *BI = dyn_cast<BranchInst>(&I)) 9980b57cec5SDimitry Andric CFH.registerPossiblyHoistableBranch(BI); 9990b57cec5SDimitry Andric } 10000b57cec5SDimitry Andric } 10010b57cec5SDimitry Andric 10020b57cec5SDimitry Andric // If we hoisted instructions to a conditional block they may not dominate 10030b57cec5SDimitry Andric // their uses that weren't hoisted (such as phis where some operands are not 10040b57cec5SDimitry Andric // loop invariant). If so make them unconditional by moving them to their 10050b57cec5SDimitry Andric // immediate dominator. We iterate through the instructions in reverse order 10060b57cec5SDimitry Andric // which ensures that when we rehoist an instruction we rehoist its operands, 10075f757f3fSDimitry Andric // and also keep track of where in the block we are rehoisting to make sure 10080b57cec5SDimitry Andric // that we rehoist instructions before the instructions that use them. 10090b57cec5SDimitry Andric Instruction *HoistPoint = nullptr; 10100b57cec5SDimitry Andric if (ControlFlowHoisting) { 10110b57cec5SDimitry Andric for (Instruction *I : reverse(HoistedInstructions)) { 10120b57cec5SDimitry Andric if (!llvm::all_of(I->uses(), 10130b57cec5SDimitry Andric [&](Use &U) { return DT->dominates(I, U); })) { 10140b57cec5SDimitry Andric BasicBlock *Dominator = 10150b57cec5SDimitry Andric DT->getNode(I->getParent())->getIDom()->getBlock(); 10160b57cec5SDimitry Andric if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) { 10170b57cec5SDimitry Andric if (HoistPoint) 10180b57cec5SDimitry Andric assert(DT->dominates(Dominator, HoistPoint->getParent()) && 10190b57cec5SDimitry Andric "New hoist point expected to dominate old hoist point"); 10200b57cec5SDimitry Andric HoistPoint = Dominator->getTerminator(); 10210b57cec5SDimitry Andric } 10220b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LICM rehoisting to " 1023e8d8bef9SDimitry Andric << HoistPoint->getParent()->getNameOrAsOperand() 10240b57cec5SDimitry Andric << ": " << *I << "\n"); 10255f757f3fSDimitry Andric moveInstructionBefore(*I, HoistPoint->getIterator(), *SafetyInfo, MSSAU, 10265f757f3fSDimitry Andric SE); 10270b57cec5SDimitry Andric HoistPoint = I; 10280b57cec5SDimitry Andric Changed = true; 10290b57cec5SDimitry Andric } 10300b57cec5SDimitry Andric } 10310b57cec5SDimitry Andric } 1032349cc55cSDimitry Andric if (VerifyMemorySSA) 103381ad6265SDimitry Andric MSSAU.getMemorySSA()->verifyMemorySSA(); 10340b57cec5SDimitry Andric 10350b57cec5SDimitry Andric // Now that we've finished hoisting make sure that LI and DT are still 10360b57cec5SDimitry Andric // valid. 10378bcb0991SDimitry Andric #ifdef EXPENSIVE_CHECKS 10380b57cec5SDimitry Andric if (Changed) { 10390b57cec5SDimitry Andric assert(DT->verify(DominatorTree::VerificationLevel::Fast) && 10400b57cec5SDimitry Andric "Dominator tree verification failed"); 10410b57cec5SDimitry Andric LI->verify(*DT); 10420b57cec5SDimitry Andric } 10430b57cec5SDimitry Andric #endif 10440b57cec5SDimitry Andric 10450b57cec5SDimitry Andric return Changed; 10460b57cec5SDimitry Andric } 10470b57cec5SDimitry Andric 10480b57cec5SDimitry Andric // Return true if LI is invariant within scope of the loop. LI is invariant if 10490b57cec5SDimitry Andric // CurLoop is dominated by an invariant.start representing the same memory 10500b57cec5SDimitry Andric // location and size as the memory location LI loads from, and also the 10510b57cec5SDimitry Andric // invariant.start has no uses. 10520b57cec5SDimitry Andric static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, 10530b57cec5SDimitry Andric Loop *CurLoop) { 10545f757f3fSDimitry Andric Value *Addr = LI->getPointerOperand(); 10550fca6ea1SDimitry Andric const DataLayout &DL = LI->getDataLayout(); 1056e8d8bef9SDimitry Andric const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType()); 1057e8d8bef9SDimitry Andric 1058e8d8bef9SDimitry Andric // It is not currently possible for clang to generate an invariant.start 1059e8d8bef9SDimitry Andric // intrinsic with scalable vector types because we don't support thread local 1060e8d8bef9SDimitry Andric // sizeless types and we don't permit sizeless types in structs or classes. 1061e8d8bef9SDimitry Andric // Furthermore, even if support is added for this in future the intrinsic 1062e8d8bef9SDimitry Andric // itself is defined to have a size of -1 for variable sized objects. This 1063e8d8bef9SDimitry Andric // makes it impossible to verify if the intrinsic envelops our region of 1064e8d8bef9SDimitry Andric // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8> 1065e8d8bef9SDimitry Andric // types would have a -1 parameter, but the former is clearly double the size 1066e8d8bef9SDimitry Andric // of the latter. 1067e8d8bef9SDimitry Andric if (LocSizeInBits.isScalable()) 1068e8d8bef9SDimitry Andric return false; 10690b57cec5SDimitry Andric 1070349cc55cSDimitry Andric // If we've ended up at a global/constant, bail. We shouldn't be looking at 1071349cc55cSDimitry Andric // uselists for non-local Values in a loop pass. 1072349cc55cSDimitry Andric if (isa<Constant>(Addr)) 1073349cc55cSDimitry Andric return false; 10740b57cec5SDimitry Andric 10750b57cec5SDimitry Andric unsigned UsesVisited = 0; 10760b57cec5SDimitry Andric // Traverse all uses of the load operand value, to see if invariant.start is 10770b57cec5SDimitry Andric // one of the uses, and whether it dominates the load instruction. 10780b57cec5SDimitry Andric for (auto *U : Addr->users()) { 10790b57cec5SDimitry Andric // Avoid traversing for Load operand with high number of users. 10800b57cec5SDimitry Andric if (++UsesVisited > MaxNumUsesTraversed) 10810b57cec5SDimitry Andric return false; 10820b57cec5SDimitry Andric IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); 10830b57cec5SDimitry Andric // If there are escaping uses of invariant.start instruction, the load maybe 10840b57cec5SDimitry Andric // non-invariant. 10850b57cec5SDimitry Andric if (!II || II->getIntrinsicID() != Intrinsic::invariant_start || 10860b57cec5SDimitry Andric !II->use_empty()) 10870b57cec5SDimitry Andric continue; 1088e8d8bef9SDimitry Andric ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0)); 1089e8d8bef9SDimitry Andric // The intrinsic supports having a -1 argument for variable sized objects 1090e8d8bef9SDimitry Andric // so we should check for that here. 1091e8d8bef9SDimitry Andric if (InvariantSize->isNegative()) 1092e8d8bef9SDimitry Andric continue; 1093e8d8bef9SDimitry Andric uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8; 10940b57cec5SDimitry Andric // Confirm the invariant.start location size contains the load operand size 10950b57cec5SDimitry Andric // in bits. Also, the invariant.start should dominate the load, and we 10960b57cec5SDimitry Andric // should not hoist the load out of a loop that contains this dominating 10970b57cec5SDimitry Andric // invariant.start. 1098bdd1243dSDimitry Andric if (LocSizeInBits.getFixedValue() <= InvariantSizeInBits && 10990b57cec5SDimitry Andric DT->properlyDominates(II->getParent(), CurLoop->getHeader())) 11000b57cec5SDimitry Andric return true; 11010b57cec5SDimitry Andric } 11020b57cec5SDimitry Andric 11030b57cec5SDimitry Andric return false; 11040b57cec5SDimitry Andric } 11050b57cec5SDimitry Andric 11060b57cec5SDimitry Andric namespace { 11070b57cec5SDimitry Andric /// Return true if-and-only-if we know how to (mechanically) both hoist and 11080b57cec5SDimitry Andric /// sink a given instruction out of a loop. Does not address legality 11090b57cec5SDimitry Andric /// concerns such as aliasing or speculation safety. 11100b57cec5SDimitry Andric bool isHoistableAndSinkableInst(Instruction &I) { 11110b57cec5SDimitry Andric // Only these instructions are hoistable/sinkable. 11120b57cec5SDimitry Andric return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) || 11135ffd83dbSDimitry Andric isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) || 11145ffd83dbSDimitry Andric isa<BinaryOperator>(I) || isa<SelectInst>(I) || 11155ffd83dbSDimitry Andric isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 11160b57cec5SDimitry Andric isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 11170b57cec5SDimitry Andric isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 11185ffd83dbSDimitry Andric isa<InsertValueInst>(I) || isa<FreezeInst>(I)); 11190b57cec5SDimitry Andric } 112081ad6265SDimitry Andric /// Return true if MSSA knows there are no MemoryDefs in the loop. 112181ad6265SDimitry Andric bool isReadOnly(const MemorySSAUpdater &MSSAU, const Loop *L) { 11220b57cec5SDimitry Andric for (auto *BB : L->getBlocks()) 112381ad6265SDimitry Andric if (MSSAU.getMemorySSA()->getBlockDefs(BB)) 11240b57cec5SDimitry Andric return false; 11250b57cec5SDimitry Andric return true; 11260b57cec5SDimitry Andric } 11270b57cec5SDimitry Andric 11280b57cec5SDimitry Andric /// Return true if I is the only Instruction with a MemoryAccess in L. 11290b57cec5SDimitry Andric bool isOnlyMemoryAccess(const Instruction *I, const Loop *L, 113081ad6265SDimitry Andric const MemorySSAUpdater &MSSAU) { 11310b57cec5SDimitry Andric for (auto *BB : L->getBlocks()) 113281ad6265SDimitry Andric if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) { 11330b57cec5SDimitry Andric int NotAPhi = 0; 11340b57cec5SDimitry Andric for (const auto &Acc : *Accs) { 11350b57cec5SDimitry Andric if (isa<MemoryPhi>(&Acc)) 11360b57cec5SDimitry Andric continue; 11370b57cec5SDimitry Andric const auto *MUD = cast<MemoryUseOrDef>(&Acc); 11380b57cec5SDimitry Andric if (MUD->getMemoryInst() != I || NotAPhi++ == 1) 11390b57cec5SDimitry Andric return false; 11400b57cec5SDimitry Andric } 11410b57cec5SDimitry Andric } 11420b57cec5SDimitry Andric return true; 11430b57cec5SDimitry Andric } 11440b57cec5SDimitry Andric } 11450b57cec5SDimitry Andric 114606c3fb27SDimitry Andric static MemoryAccess *getClobberingMemoryAccess(MemorySSA &MSSA, 114706c3fb27SDimitry Andric BatchAAResults &BAA, 114806c3fb27SDimitry Andric SinkAndHoistLICMFlags &Flags, 114906c3fb27SDimitry Andric MemoryUseOrDef *MA) { 115006c3fb27SDimitry Andric // See declaration of SetLicmMssaOptCap for usage details. 115106c3fb27SDimitry Andric if (Flags.tooManyClobberingCalls()) 115206c3fb27SDimitry Andric return MA->getDefiningAccess(); 115306c3fb27SDimitry Andric 115406c3fb27SDimitry Andric MemoryAccess *Source = 115506c3fb27SDimitry Andric MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(MA, BAA); 115606c3fb27SDimitry Andric Flags.incrementClobberingCalls(); 115706c3fb27SDimitry Andric return Source; 115806c3fb27SDimitry Andric } 115906c3fb27SDimitry Andric 11600b57cec5SDimitry Andric bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, 116181ad6265SDimitry Andric Loop *CurLoop, MemorySSAUpdater &MSSAU, 11620b57cec5SDimitry Andric bool TargetExecutesOncePerLoop, 116381ad6265SDimitry Andric SinkAndHoistLICMFlags &Flags, 11640b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE) { 11650b57cec5SDimitry Andric // If we don't understand the instruction, bail early. 11660b57cec5SDimitry Andric if (!isHoistableAndSinkableInst(I)) 11670b57cec5SDimitry Andric return false; 11680b57cec5SDimitry Andric 116981ad6265SDimitry Andric MemorySSA *MSSA = MSSAU.getMemorySSA(); 11700b57cec5SDimitry Andric // Loads have extra constraints we have to verify before we can hoist them. 11710b57cec5SDimitry Andric if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { 11720b57cec5SDimitry Andric if (!LI->isUnordered()) 11730b57cec5SDimitry Andric return false; // Don't sink/hoist volatile or ordered atomic loads! 11740b57cec5SDimitry Andric 11750b57cec5SDimitry Andric // Loads from constant memory are always safe to move, even if they end up 11760b57cec5SDimitry Andric // in the same alias set as something that ends up being modified. 1177bdd1243dSDimitry Andric if (!isModSet(AA->getModRefInfoMask(LI->getOperand(0)))) 11780b57cec5SDimitry Andric return true; 11798bcb0991SDimitry Andric if (LI->hasMetadata(LLVMContext::MD_invariant_load)) 11800b57cec5SDimitry Andric return true; 11810b57cec5SDimitry Andric 11820b57cec5SDimitry Andric if (LI->isAtomic() && !TargetExecutesOncePerLoop) 11830b57cec5SDimitry Andric return false; // Don't risk duplicating unordered loads 11840b57cec5SDimitry Andric 11850b57cec5SDimitry Andric // This checks for an invariant.start dominating the load. 11860b57cec5SDimitry Andric if (isLoadInvariantInLoop(LI, DT, CurLoop)) 11870b57cec5SDimitry Andric return true; 11880b57cec5SDimitry Andric 118906c3fb27SDimitry Andric auto MU = cast<MemoryUse>(MSSA->getMemoryAccess(LI)); 119006c3fb27SDimitry Andric 119106c3fb27SDimitry Andric bool InvariantGroup = LI->hasMetadata(LLVMContext::MD_invariant_group); 119206c3fb27SDimitry Andric 119381ad6265SDimitry Andric bool Invalidated = pointerInvalidatedByLoop( 119406c3fb27SDimitry Andric MSSA, MU, CurLoop, I, Flags, InvariantGroup); 11950b57cec5SDimitry Andric // Check loop-invariant address because this may also be a sinkable load 11960b57cec5SDimitry Andric // whose address is not necessarily loop-invariant. 11970b57cec5SDimitry Andric if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) 11980b57cec5SDimitry Andric ORE->emit([&]() { 11990b57cec5SDimitry Andric return OptimizationRemarkMissed( 12000b57cec5SDimitry Andric DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI) 12010b57cec5SDimitry Andric << "failed to move load with loop-invariant address " 12020b57cec5SDimitry Andric "because the loop may invalidate its value"; 12030b57cec5SDimitry Andric }); 12040b57cec5SDimitry Andric 12050b57cec5SDimitry Andric return !Invalidated; 12060b57cec5SDimitry Andric } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { 12070b57cec5SDimitry Andric // Don't sink or hoist dbg info; it's legal, but not useful. 12080b57cec5SDimitry Andric if (isa<DbgInfoIntrinsic>(I)) 12090b57cec5SDimitry Andric return false; 12100b57cec5SDimitry Andric 12110b57cec5SDimitry Andric // Don't sink calls which can throw. 12120b57cec5SDimitry Andric if (CI->mayThrow()) 12130b57cec5SDimitry Andric return false; 12140b57cec5SDimitry Andric 1215e8d8bef9SDimitry Andric // Convergent attribute has been used on operations that involve 1216e8d8bef9SDimitry Andric // inter-thread communication which results are implicitly affected by the 1217e8d8bef9SDimitry Andric // enclosing control flows. It is not safe to hoist or sink such operations 1218e8d8bef9SDimitry Andric // across control flow. 1219e8d8bef9SDimitry Andric if (CI->isConvergent()) 1220e8d8bef9SDimitry Andric return false; 1221e8d8bef9SDimitry Andric 12220fca6ea1SDimitry Andric // FIXME: Current LLVM IR semantics don't work well with coroutines and 12230fca6ea1SDimitry Andric // thread local globals. We currently treat getting the address of a thread 12240fca6ea1SDimitry Andric // local global as not accessing memory, even though it may not be a 12250fca6ea1SDimitry Andric // constant throughout a function with coroutines. Remove this check after 12260fca6ea1SDimitry Andric // we better model semantics of thread local globals. 12270fca6ea1SDimitry Andric if (CI->getFunction()->isPresplitCoroutine()) 12280fca6ea1SDimitry Andric return false; 12290fca6ea1SDimitry Andric 12300b57cec5SDimitry Andric using namespace PatternMatch; 12310b57cec5SDimitry Andric if (match(CI, m_Intrinsic<Intrinsic::assume>())) 12320b57cec5SDimitry Andric // Assumes don't actually alias anything or throw 12330b57cec5SDimitry Andric return true; 12340b57cec5SDimitry Andric 12350b57cec5SDimitry Andric // Handle simple cases by querying alias analysis. 1236bdd1243dSDimitry Andric MemoryEffects Behavior = AA->getMemoryEffects(CI); 123706c3fb27SDimitry Andric 1238bdd1243dSDimitry Andric if (Behavior.doesNotAccessMemory()) 12390b57cec5SDimitry Andric return true; 1240bdd1243dSDimitry Andric if (Behavior.onlyReadsMemory()) { 12410b57cec5SDimitry Andric // A readonly argmemonly function only reads from memory pointed to by 12420b57cec5SDimitry Andric // it's arguments with arbitrary offsets. If we can prove there are no 12430b57cec5SDimitry Andric // writes to this memory in the loop, we can hoist or sink. 1244bdd1243dSDimitry Andric if (Behavior.onlyAccessesArgPointees()) { 12450b57cec5SDimitry Andric // TODO: expand to writeable arguments 1246349cc55cSDimitry Andric for (Value *Op : CI->args()) 124781ad6265SDimitry Andric if (Op->getType()->isPointerTy() && 124881ad6265SDimitry Andric pointerInvalidatedByLoop( 1249e8d8bef9SDimitry Andric MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I, 125006c3fb27SDimitry Andric Flags, /*InvariantGroup=*/false)) 12510b57cec5SDimitry Andric return false; 12520b57cec5SDimitry Andric return true; 12530b57cec5SDimitry Andric } 12540b57cec5SDimitry Andric 12550b57cec5SDimitry Andric // If this call only reads from memory and there are no writes to memory 12560b57cec5SDimitry Andric // in the loop, we can hoist or sink the call as appropriate. 125781ad6265SDimitry Andric if (isReadOnly(MSSAU, CurLoop)) 12580b57cec5SDimitry Andric return true; 12590b57cec5SDimitry Andric } 12600b57cec5SDimitry Andric 12610b57cec5SDimitry Andric // FIXME: This should use mod/ref information to see if we can hoist or 12620b57cec5SDimitry Andric // sink the call. 12630b57cec5SDimitry Andric 12640b57cec5SDimitry Andric return false; 12650b57cec5SDimitry Andric } else if (auto *FI = dyn_cast<FenceInst>(&I)) { 12660b57cec5SDimitry Andric // Fences alias (most) everything to provide ordering. For the moment, 12670b57cec5SDimitry Andric // just give up if there are any other memory operations in the loop. 12680b57cec5SDimitry Andric return isOnlyMemoryAccess(FI, CurLoop, MSSAU); 12690b57cec5SDimitry Andric } else if (auto *SI = dyn_cast<StoreInst>(&I)) { 12700b57cec5SDimitry Andric if (!SI->isUnordered()) 12710b57cec5SDimitry Andric return false; // Don't sink/hoist volatile or ordered atomic store! 12720b57cec5SDimitry Andric 12730b57cec5SDimitry Andric // We can only hoist a store that we can prove writes a value which is not 12740b57cec5SDimitry Andric // read or overwritten within the loop. For those cases, we fallback to 12750b57cec5SDimitry Andric // load store promotion instead. TODO: We can extend this to cases where 12760b57cec5SDimitry Andric // there is exactly one write to the location and that write dominates an 12770b57cec5SDimitry Andric // arbitrary number of reads in the loop. 12780b57cec5SDimitry Andric if (isOnlyMemoryAccess(SI, CurLoop, MSSAU)) 12790b57cec5SDimitry Andric return true; 128006c3fb27SDimitry Andric // If there are more accesses than the Promotion cap, then give up as we're 128106c3fb27SDimitry Andric // not walking a list that long. 128206c3fb27SDimitry Andric if (Flags.tooManyMemoryAccesses()) 12830b57cec5SDimitry Andric return false; 128406c3fb27SDimitry Andric 128506c3fb27SDimitry Andric auto *SIMD = MSSA->getMemoryAccess(SI); 128606c3fb27SDimitry Andric BatchAAResults BAA(*AA); 128706c3fb27SDimitry Andric auto *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, SIMD); 128806c3fb27SDimitry Andric // Make sure there are no clobbers inside the loop. 128906c3fb27SDimitry Andric if (!MSSA->isLiveOnEntryDef(Source) && 129006c3fb27SDimitry Andric CurLoop->contains(Source->getBlock())) 129106c3fb27SDimitry Andric return false; 129206c3fb27SDimitry Andric 12930b57cec5SDimitry Andric // If there are interfering Uses (i.e. their defining access is in the 12940b57cec5SDimitry Andric // loop), or ordered loads (stored as Defs!), don't move this store. 12950b57cec5SDimitry Andric // Could do better here, but this is conservatively correct. 12960b57cec5SDimitry Andric // TODO: Cache set of Uses on the first walk in runOnLoop, update when 12970b57cec5SDimitry Andric // moving accesses. Can also extend to dominating uses. 12980b57cec5SDimitry Andric for (auto *BB : CurLoop->getBlocks()) 12990b57cec5SDimitry Andric if (auto *Accesses = MSSA->getBlockAccesses(BB)) { 13000b57cec5SDimitry Andric for (const auto &MA : *Accesses) 13010b57cec5SDimitry Andric if (const auto *MU = dyn_cast<MemoryUse>(&MA)) { 130206c3fb27SDimitry Andric auto *MD = getClobberingMemoryAccess(*MSSA, BAA, Flags, 130306c3fb27SDimitry Andric const_cast<MemoryUse *>(MU)); 13040b57cec5SDimitry Andric if (!MSSA->isLiveOnEntryDef(MD) && 13050b57cec5SDimitry Andric CurLoop->contains(MD->getBlock())) 13060b57cec5SDimitry Andric return false; 13070b57cec5SDimitry Andric // Disable hoisting past potentially interfering loads. Optimized 13080b57cec5SDimitry Andric // Uses may point to an access outside the loop, as getClobbering 13090b57cec5SDimitry Andric // checks the previous iteration when walking the backedge. 13100b57cec5SDimitry Andric // FIXME: More precise: no Uses that alias SI. 131181ad6265SDimitry Andric if (!Flags.getIsSink() && !MSSA->dominates(SIMD, MU)) 13120b57cec5SDimitry Andric return false; 13138bcb0991SDimitry Andric } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) { 13140b57cec5SDimitry Andric if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) { 13150b57cec5SDimitry Andric (void)LI; // Silence warning. 13160b57cec5SDimitry Andric assert(!LI->isUnordered() && "Expected unordered load"); 13170b57cec5SDimitry Andric return false; 13180b57cec5SDimitry Andric } 13198bcb0991SDimitry Andric // Any call, while it may not be clobbering SI, it may be a use. 13208bcb0991SDimitry Andric if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) { 1321fe6060f1SDimitry Andric // Check if the call may read from the memory location written 13228bcb0991SDimitry Andric // to by SI. Check CI's attributes and arguments; the number of 13238bcb0991SDimitry Andric // such checks performed is limited above by NoOfMemAccTooLarge. 132406c3fb27SDimitry Andric ModRefInfo MRI = BAA.getModRefInfo(CI, MemoryLocation::get(SI)); 13258bcb0991SDimitry Andric if (isModOrRefSet(MRI)) 13268bcb0991SDimitry Andric return false; 13278bcb0991SDimitry Andric } 13288bcb0991SDimitry Andric } 13290b57cec5SDimitry Andric } 133006c3fb27SDimitry Andric return true; 13310b57cec5SDimitry Andric } 13320b57cec5SDimitry Andric 13330b57cec5SDimitry Andric assert(!I.mayReadOrWriteMemory() && "unhandled aliasing"); 13340b57cec5SDimitry Andric 13350b57cec5SDimitry Andric // We've established mechanical ability and aliasing, it's up to the caller 13360b57cec5SDimitry Andric // to check fault safety 13370b57cec5SDimitry Andric return true; 13380b57cec5SDimitry Andric } 13390b57cec5SDimitry Andric 13400b57cec5SDimitry Andric /// Returns true if a PHINode is a trivially replaceable with an 13410b57cec5SDimitry Andric /// Instruction. 13420b57cec5SDimitry Andric /// This is true when all incoming values are that instruction. 13430b57cec5SDimitry Andric /// This pattern occurs most often with LCSSA PHI nodes. 13440b57cec5SDimitry Andric /// 13450b57cec5SDimitry Andric static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) { 13460b57cec5SDimitry Andric for (const Value *IncValue : PN.incoming_values()) 13470b57cec5SDimitry Andric if (IncValue != &I) 13480b57cec5SDimitry Andric return false; 13490b57cec5SDimitry Andric 13500b57cec5SDimitry Andric return true; 13510b57cec5SDimitry Andric } 13520b57cec5SDimitry Andric 135306c3fb27SDimitry Andric /// Return true if the instruction is foldable in the loop. 135406c3fb27SDimitry Andric static bool isFoldableInLoop(const Instruction &I, const Loop *CurLoop, 13550b57cec5SDimitry Andric const TargetTransformInfo *TTI) { 135606c3fb27SDimitry Andric if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) { 1357bdd1243dSDimitry Andric InstructionCost CostI = 1358bdd1243dSDimitry Andric TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency); 1359bdd1243dSDimitry Andric if (CostI != TargetTransformInfo::TCC_Free) 13600b57cec5SDimitry Andric return false; 1361bdd1243dSDimitry Andric // For a GEP, we cannot simply use getInstructionCost because currently 1362bdd1243dSDimitry Andric // it optimistically assumes that a GEP will fold into addressing mode 13630b57cec5SDimitry Andric // regardless of its users. 13640b57cec5SDimitry Andric const BasicBlock *BB = GEP->getParent(); 13650b57cec5SDimitry Andric for (const User *U : GEP->users()) { 13660b57cec5SDimitry Andric const Instruction *UI = cast<Instruction>(U); 13670b57cec5SDimitry Andric if (CurLoop->contains(UI) && 13680b57cec5SDimitry Andric (BB != UI->getParent() || 13690b57cec5SDimitry Andric (!isa<StoreInst>(UI) && !isa<LoadInst>(UI)))) 13700b57cec5SDimitry Andric return false; 13710b57cec5SDimitry Andric } 13720b57cec5SDimitry Andric return true; 1373bdd1243dSDimitry Andric } 1374bdd1243dSDimitry Andric 137506c3fb27SDimitry Andric return false; 13760b57cec5SDimitry Andric } 13770b57cec5SDimitry Andric 13780b57cec5SDimitry Andric /// Return true if the only users of this instruction are outside of 13790b57cec5SDimitry Andric /// the loop. If this is true, we can sink the instruction to the exit 13800b57cec5SDimitry Andric /// blocks of the loop. 13810b57cec5SDimitry Andric /// 13820b57cec5SDimitry Andric /// We also return true if the instruction could be folded away in lowering. 13830b57cec5SDimitry Andric /// (e.g., a GEP can be folded into a load as an addressing mode in the loop). 138406c3fb27SDimitry Andric static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop, 13850b57cec5SDimitry Andric const LoopSafetyInfo *SafetyInfo, 138606c3fb27SDimitry Andric TargetTransformInfo *TTI, 138706c3fb27SDimitry Andric bool &FoldableInLoop, bool LoopNestMode) { 13880b57cec5SDimitry Andric const auto &BlockColors = SafetyInfo->getBlockColors(); 138906c3fb27SDimitry Andric bool IsFoldable = isFoldableInLoop(I, CurLoop, TTI); 13900b57cec5SDimitry Andric for (const User *U : I.users()) { 13910b57cec5SDimitry Andric const Instruction *UI = cast<Instruction>(U); 13920b57cec5SDimitry Andric if (const PHINode *PN = dyn_cast<PHINode>(UI)) { 13930b57cec5SDimitry Andric const BasicBlock *BB = PN->getParent(); 13940b57cec5SDimitry Andric // We cannot sink uses in catchswitches. 13950b57cec5SDimitry Andric if (isa<CatchSwitchInst>(BB->getTerminator())) 13960b57cec5SDimitry Andric return false; 13970b57cec5SDimitry Andric 13980b57cec5SDimitry Andric // We need to sink a callsite to a unique funclet. Avoid sinking if the 13990b57cec5SDimitry Andric // phi use is too muddled. 14000b57cec5SDimitry Andric if (isa<CallInst>(I)) 14010b57cec5SDimitry Andric if (!BlockColors.empty() && 14020b57cec5SDimitry Andric BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1) 14030b57cec5SDimitry Andric return false; 1404349cc55cSDimitry Andric 1405349cc55cSDimitry Andric if (LoopNestMode) { 1406349cc55cSDimitry Andric while (isa<PHINode>(UI) && UI->hasOneUser() && 1407349cc55cSDimitry Andric UI->getNumOperands() == 1) { 1408349cc55cSDimitry Andric if (!CurLoop->contains(UI)) 1409349cc55cSDimitry Andric break; 1410349cc55cSDimitry Andric UI = cast<Instruction>(UI->user_back()); 1411349cc55cSDimitry Andric } 1412349cc55cSDimitry Andric } 14130b57cec5SDimitry Andric } 14140b57cec5SDimitry Andric 14150b57cec5SDimitry Andric if (CurLoop->contains(UI)) { 141606c3fb27SDimitry Andric if (IsFoldable) { 141706c3fb27SDimitry Andric FoldableInLoop = true; 14180b57cec5SDimitry Andric continue; 14190b57cec5SDimitry Andric } 14200b57cec5SDimitry Andric return false; 14210b57cec5SDimitry Andric } 14220b57cec5SDimitry Andric } 14230b57cec5SDimitry Andric return true; 14240b57cec5SDimitry Andric } 14250b57cec5SDimitry Andric 14265ffd83dbSDimitry Andric static Instruction *cloneInstructionInExitBlock( 14270b57cec5SDimitry Andric Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, 142881ad6265SDimitry Andric const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU) { 14290b57cec5SDimitry Andric Instruction *New; 14300b57cec5SDimitry Andric if (auto *CI = dyn_cast<CallInst>(&I)) { 14310b57cec5SDimitry Andric const auto &BlockColors = SafetyInfo->getBlockColors(); 14320b57cec5SDimitry Andric 14330b57cec5SDimitry Andric // Sinking call-sites need to be handled differently from other 14340b57cec5SDimitry Andric // instructions. The cloned call-site needs a funclet bundle operand 14350b57cec5SDimitry Andric // appropriate for its location in the CFG. 14360b57cec5SDimitry Andric SmallVector<OperandBundleDef, 1> OpBundles; 14370b57cec5SDimitry Andric for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles(); 14380b57cec5SDimitry Andric BundleIdx != BundleEnd; ++BundleIdx) { 14390b57cec5SDimitry Andric OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx); 14400b57cec5SDimitry Andric if (Bundle.getTagID() == LLVMContext::OB_funclet) 14410b57cec5SDimitry Andric continue; 14420b57cec5SDimitry Andric 14430b57cec5SDimitry Andric OpBundles.emplace_back(Bundle); 14440b57cec5SDimitry Andric } 14450b57cec5SDimitry Andric 14460b57cec5SDimitry Andric if (!BlockColors.empty()) { 14470b57cec5SDimitry Andric const ColorVector &CV = BlockColors.find(&ExitBlock)->second; 14480b57cec5SDimitry Andric assert(CV.size() == 1 && "non-unique color for exit block!"); 14490b57cec5SDimitry Andric BasicBlock *BBColor = CV.front(); 14500b57cec5SDimitry Andric Instruction *EHPad = BBColor->getFirstNonPHI(); 14510b57cec5SDimitry Andric if (EHPad->isEHPad()) 14520b57cec5SDimitry Andric OpBundles.emplace_back("funclet", EHPad); 14530b57cec5SDimitry Andric } 14540b57cec5SDimitry Andric 14550b57cec5SDimitry Andric New = CallInst::Create(CI, OpBundles); 14560fca6ea1SDimitry Andric New->copyMetadata(*CI); 14570b57cec5SDimitry Andric } else { 14580b57cec5SDimitry Andric New = I.clone(); 14590b57cec5SDimitry Andric } 14600b57cec5SDimitry Andric 1461bdd1243dSDimitry Andric New->insertInto(&ExitBlock, ExitBlock.getFirstInsertionPt()); 14620b57cec5SDimitry Andric if (!I.getName().empty()) 14630b57cec5SDimitry Andric New->setName(I.getName() + ".le"); 14640b57cec5SDimitry Andric 146581ad6265SDimitry Andric if (MSSAU.getMemorySSA()->getMemoryAccess(&I)) { 14660b57cec5SDimitry Andric // Create a new MemoryAccess and let MemorySSA set its defining access. 1467*71ac745dSDimitry Andric // After running some passes, MemorySSA might be outdated, and the 1468*71ac745dSDimitry Andric // instruction `I` may have become a non-memory touching instruction. 146981ad6265SDimitry Andric MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB( 1470*71ac745dSDimitry Andric New, nullptr, New->getParent(), MemorySSA::Beginning, 1471*71ac745dSDimitry Andric /*CreationMustSucceed=*/false); 14720b57cec5SDimitry Andric if (NewMemAcc) { 14730b57cec5SDimitry Andric if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc)) 147481ad6265SDimitry Andric MSSAU.insertDef(MemDef, /*RenameUses=*/true); 14750b57cec5SDimitry Andric else { 14760b57cec5SDimitry Andric auto *MemUse = cast<MemoryUse>(NewMemAcc); 147781ad6265SDimitry Andric MSSAU.insertUse(MemUse, /*RenameUses=*/true); 14780b57cec5SDimitry Andric } 14790b57cec5SDimitry Andric } 14800b57cec5SDimitry Andric } 14810b57cec5SDimitry Andric 1482fe6060f1SDimitry Andric // Build LCSSA PHI nodes for any in-loop operands (if legal). Note that 1483fe6060f1SDimitry Andric // this is particularly cheap because we can rip off the PHI node that we're 14840b57cec5SDimitry Andric // replacing for the number and blocks of the predecessors. 14850b57cec5SDimitry Andric // OPT: If this shows up in a profile, we can instead finish sinking all 14860b57cec5SDimitry Andric // invariant instructions, and then walk their operands to re-establish 14870b57cec5SDimitry Andric // LCSSA. That will eliminate creating PHI nodes just to nuke them when 14880b57cec5SDimitry Andric // sinking bottom-up. 1489fe6060f1SDimitry Andric for (Use &Op : New->operands()) 1490fe6060f1SDimitry Andric if (LI->wouldBeOutOfLoopUseRequiringLCSSA(Op.get(), PN.getParent())) { 1491fe6060f1SDimitry Andric auto *OInst = cast<Instruction>(Op.get()); 14920b57cec5SDimitry Andric PHINode *OpPN = 14930b57cec5SDimitry Andric PHINode::Create(OInst->getType(), PN.getNumIncomingValues(), 14945f757f3fSDimitry Andric OInst->getName() + ".lcssa"); 14955f757f3fSDimitry Andric OpPN->insertBefore(ExitBlock.begin()); 14960b57cec5SDimitry Andric for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) 14970b57cec5SDimitry Andric OpPN->addIncoming(OInst, PN.getIncomingBlock(i)); 1498fe6060f1SDimitry Andric Op = OpPN; 14990b57cec5SDimitry Andric } 15000b57cec5SDimitry Andric return New; 15010b57cec5SDimitry Andric } 15020b57cec5SDimitry Andric 15030b57cec5SDimitry Andric static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, 150481ad6265SDimitry Andric MemorySSAUpdater &MSSAU) { 150581ad6265SDimitry Andric MSSAU.removeMemoryAccess(&I); 15060b57cec5SDimitry Andric SafetyInfo.removeInstruction(&I); 15070b57cec5SDimitry Andric I.eraseFromParent(); 15080b57cec5SDimitry Andric } 15090b57cec5SDimitry Andric 15105f757f3fSDimitry Andric static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest, 15110b57cec5SDimitry Andric ICFLoopSafetyInfo &SafetyInfo, 151281ad6265SDimitry Andric MemorySSAUpdater &MSSAU, 1513480093f4SDimitry Andric ScalarEvolution *SE) { 15140b57cec5SDimitry Andric SafetyInfo.removeInstruction(&I); 15155f757f3fSDimitry Andric SafetyInfo.insertInstructionTo(&I, Dest->getParent()); 15165f757f3fSDimitry Andric I.moveBefore(*Dest->getParent(), Dest); 15170b57cec5SDimitry Andric if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>( 151881ad6265SDimitry Andric MSSAU.getMemorySSA()->getMemoryAccess(&I))) 15195f757f3fSDimitry Andric MSSAU.moveToPlace(OldMemAcc, Dest->getParent(), 15205f757f3fSDimitry Andric MemorySSA::BeforeTerminator); 1521480093f4SDimitry Andric if (SE) 152206c3fb27SDimitry Andric SE->forgetBlockAndLoopDispositions(&I); 15230b57cec5SDimitry Andric } 15240b57cec5SDimitry Andric 15250b57cec5SDimitry Andric static Instruction *sinkThroughTriviallyReplaceablePHI( 15260b57cec5SDimitry Andric PHINode *TPN, Instruction *I, LoopInfo *LI, 15270b57cec5SDimitry Andric SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies, 15280b57cec5SDimitry Andric const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop, 152981ad6265SDimitry Andric MemorySSAUpdater &MSSAU) { 15300b57cec5SDimitry Andric assert(isTriviallyReplaceablePHI(*TPN, *I) && 15310b57cec5SDimitry Andric "Expect only trivially replaceable PHI"); 15320b57cec5SDimitry Andric BasicBlock *ExitBlock = TPN->getParent(); 15330b57cec5SDimitry Andric Instruction *New; 15340b57cec5SDimitry Andric auto It = SunkCopies.find(ExitBlock); 15350b57cec5SDimitry Andric if (It != SunkCopies.end()) 15360b57cec5SDimitry Andric New = It->second; 15370b57cec5SDimitry Andric else 15385ffd83dbSDimitry Andric New = SunkCopies[ExitBlock] = cloneInstructionInExitBlock( 15390b57cec5SDimitry Andric *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU); 15400b57cec5SDimitry Andric return New; 15410b57cec5SDimitry Andric } 15420b57cec5SDimitry Andric 15430b57cec5SDimitry Andric static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) { 15440b57cec5SDimitry Andric BasicBlock *BB = PN->getParent(); 15450b57cec5SDimitry Andric if (!BB->canSplitPredecessors()) 15460b57cec5SDimitry Andric return false; 15470b57cec5SDimitry Andric // It's not impossible to split EHPad blocks, but if BlockColors already exist 15480b57cec5SDimitry Andric // it require updating BlockColors for all offspring blocks accordingly. By 15490b57cec5SDimitry Andric // skipping such corner case, we can make updating BlockColors after splitting 15500b57cec5SDimitry Andric // predecessor fairly simple. 15510b57cec5SDimitry Andric if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad()) 15520b57cec5SDimitry Andric return false; 1553349cc55cSDimitry Andric for (BasicBlock *BBPred : predecessors(BB)) { 1554753f127fSDimitry Andric if (isa<IndirectBrInst>(BBPred->getTerminator())) 15550b57cec5SDimitry Andric return false; 15560b57cec5SDimitry Andric } 15570b57cec5SDimitry Andric return true; 15580b57cec5SDimitry Andric } 15590b57cec5SDimitry Andric 15600b57cec5SDimitry Andric static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, 15610b57cec5SDimitry Andric LoopInfo *LI, const Loop *CurLoop, 15620b57cec5SDimitry Andric LoopSafetyInfo *SafetyInfo, 15630b57cec5SDimitry Andric MemorySSAUpdater *MSSAU) { 15640b57cec5SDimitry Andric #ifndef NDEBUG 15650b57cec5SDimitry Andric SmallVector<BasicBlock *, 32> ExitBlocks; 15660b57cec5SDimitry Andric CurLoop->getUniqueExitBlocks(ExitBlocks); 15670b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), 15680b57cec5SDimitry Andric ExitBlocks.end()); 15690b57cec5SDimitry Andric #endif 15700b57cec5SDimitry Andric BasicBlock *ExitBB = PN->getParent(); 15710b57cec5SDimitry Andric assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block."); 15720b57cec5SDimitry Andric 15730b57cec5SDimitry Andric // Split predecessors of the loop exit to make instructions in the loop are 15740b57cec5SDimitry Andric // exposed to exit blocks through trivially replaceable PHIs while keeping the 15750b57cec5SDimitry Andric // loop in the canonical form where each predecessor of each exit block should 15760b57cec5SDimitry Andric // be contained within the loop. For example, this will convert the loop below 15770b57cec5SDimitry Andric // from 15780b57cec5SDimitry Andric // 15790b57cec5SDimitry Andric // LB1: 15800b57cec5SDimitry Andric // %v1 = 15810b57cec5SDimitry Andric // br %LE, %LB2 15820b57cec5SDimitry Andric // LB2: 15830b57cec5SDimitry Andric // %v2 = 15840b57cec5SDimitry Andric // br %LE, %LB1 15850b57cec5SDimitry Andric // LE: 15860b57cec5SDimitry Andric // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable 15870b57cec5SDimitry Andric // 15880b57cec5SDimitry Andric // to 15890b57cec5SDimitry Andric // 15900b57cec5SDimitry Andric // LB1: 15910b57cec5SDimitry Andric // %v1 = 15920b57cec5SDimitry Andric // br %LE.split, %LB2 15930b57cec5SDimitry Andric // LB2: 15940b57cec5SDimitry Andric // %v2 = 15950b57cec5SDimitry Andric // br %LE.split2, %LB1 15960b57cec5SDimitry Andric // LE.split: 15970b57cec5SDimitry Andric // %p1 = phi [%v1, %LB1] <-- trivially replaceable 15980b57cec5SDimitry Andric // br %LE 15990b57cec5SDimitry Andric // LE.split2: 16000b57cec5SDimitry Andric // %p2 = phi [%v2, %LB2] <-- trivially replaceable 16010b57cec5SDimitry Andric // br %LE 16020b57cec5SDimitry Andric // LE: 16030b57cec5SDimitry Andric // %p = phi [%p1, %LE.split], [%p2, %LE.split2] 16040b57cec5SDimitry Andric // 16050b57cec5SDimitry Andric const auto &BlockColors = SafetyInfo->getBlockColors(); 16060b57cec5SDimitry Andric SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB)); 16070b57cec5SDimitry Andric while (!PredBBs.empty()) { 16080b57cec5SDimitry Andric BasicBlock *PredBB = *PredBBs.begin(); 16090b57cec5SDimitry Andric assert(CurLoop->contains(PredBB) && 16100b57cec5SDimitry Andric "Expect all predecessors are in the loop"); 16110b57cec5SDimitry Andric if (PN->getBasicBlockIndex(PredBB) >= 0) { 16120b57cec5SDimitry Andric BasicBlock *NewPred = SplitBlockPredecessors( 16130b57cec5SDimitry Andric ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true); 16140b57cec5SDimitry Andric // Since we do not allow splitting EH-block with BlockColors in 16150b57cec5SDimitry Andric // canSplitPredecessors(), we can simply assign predecessor's color to 16160b57cec5SDimitry Andric // the new block. 16170b57cec5SDimitry Andric if (!BlockColors.empty()) 16180b57cec5SDimitry Andric // Grab a reference to the ColorVector to be inserted before getting the 16190b57cec5SDimitry Andric // reference to the vector we are copying because inserting the new 16200b57cec5SDimitry Andric // element in BlockColors might cause the map to be reallocated. 16210b57cec5SDimitry Andric SafetyInfo->copyColors(NewPred, PredBB); 16220b57cec5SDimitry Andric } 16230b57cec5SDimitry Andric PredBBs.remove(PredBB); 16240b57cec5SDimitry Andric } 16250b57cec5SDimitry Andric } 16260b57cec5SDimitry Andric 16270b57cec5SDimitry Andric /// When an instruction is found to only be used outside of the loop, this 16280b57cec5SDimitry Andric /// function moves it to the exit blocks and patches up SSA form as needed. 16290b57cec5SDimitry Andric /// This method is guaranteed to remove the original instruction from its 16300b57cec5SDimitry Andric /// position, and may either delete it or move it to outside of the loop. 16310b57cec5SDimitry Andric /// 16320b57cec5SDimitry Andric static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, 1633bdd1243dSDimitry Andric const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, 1634bdd1243dSDimitry Andric MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE) { 16350b57cec5SDimitry Andric bool Changed = false; 1636fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); 16370b57cec5SDimitry Andric 16380b57cec5SDimitry Andric // Iterate over users to be ready for actual sinking. Replace users via 16390b57cec5SDimitry Andric // unreachable blocks with undef and make all user PHIs trivially replaceable. 16400b57cec5SDimitry Andric SmallPtrSet<Instruction *, 8> VisitedUsers; 16410b57cec5SDimitry Andric for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) { 16420b57cec5SDimitry Andric auto *User = cast<Instruction>(*UI); 16430b57cec5SDimitry Andric Use &U = UI.getUse(); 16440b57cec5SDimitry Andric ++UI; 16450b57cec5SDimitry Andric 16460b57cec5SDimitry Andric if (VisitedUsers.count(User) || CurLoop->contains(User)) 16470b57cec5SDimitry Andric continue; 16480b57cec5SDimitry Andric 16490b57cec5SDimitry Andric if (!DT->isReachableFromEntry(User->getParent())) { 165081ad6265SDimitry Andric U = PoisonValue::get(I.getType()); 16510b57cec5SDimitry Andric Changed = true; 16520b57cec5SDimitry Andric continue; 16530b57cec5SDimitry Andric } 16540b57cec5SDimitry Andric 16550b57cec5SDimitry Andric // The user must be a PHI node. 16560b57cec5SDimitry Andric PHINode *PN = cast<PHINode>(User); 16570b57cec5SDimitry Andric 16580b57cec5SDimitry Andric // Surprisingly, instructions can be used outside of loops without any 16590b57cec5SDimitry Andric // exits. This can only happen in PHI nodes if the incoming block is 16600b57cec5SDimitry Andric // unreachable. 16610b57cec5SDimitry Andric BasicBlock *BB = PN->getIncomingBlock(U); 16620b57cec5SDimitry Andric if (!DT->isReachableFromEntry(BB)) { 166381ad6265SDimitry Andric U = PoisonValue::get(I.getType()); 16640b57cec5SDimitry Andric Changed = true; 16650b57cec5SDimitry Andric continue; 16660b57cec5SDimitry Andric } 16670b57cec5SDimitry Andric 16680b57cec5SDimitry Andric VisitedUsers.insert(PN); 16690b57cec5SDimitry Andric if (isTriviallyReplaceablePHI(*PN, I)) 16700b57cec5SDimitry Andric continue; 16710b57cec5SDimitry Andric 16720b57cec5SDimitry Andric if (!canSplitPredecessors(PN, SafetyInfo)) 16730b57cec5SDimitry Andric return Changed; 16740b57cec5SDimitry Andric 16750b57cec5SDimitry Andric // Split predecessors of the PHI so that we can make users trivially 16760b57cec5SDimitry Andric // replaceable. 167781ad6265SDimitry Andric splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, &MSSAU); 16780b57cec5SDimitry Andric 16790b57cec5SDimitry Andric // Should rebuild the iterators, as they may be invalidated by 16800b57cec5SDimitry Andric // splitPredecessorsOfLoopExit(). 16810b57cec5SDimitry Andric UI = I.user_begin(); 16820b57cec5SDimitry Andric UE = I.user_end(); 16830b57cec5SDimitry Andric } 16840b57cec5SDimitry Andric 16850b57cec5SDimitry Andric if (VisitedUsers.empty()) 16860b57cec5SDimitry Andric return Changed; 16870b57cec5SDimitry Andric 1688fe6060f1SDimitry Andric ORE->emit([&]() { 1689fe6060f1SDimitry Andric return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) 1690fe6060f1SDimitry Andric << "sinking " << ore::NV("Inst", &I); 1691fe6060f1SDimitry Andric }); 1692fe6060f1SDimitry Andric if (isa<LoadInst>(I)) 1693fe6060f1SDimitry Andric ++NumMovedLoads; 1694fe6060f1SDimitry Andric else if (isa<CallInst>(I)) 1695fe6060f1SDimitry Andric ++NumMovedCalls; 1696fe6060f1SDimitry Andric ++NumSunk; 1697fe6060f1SDimitry Andric 16980b57cec5SDimitry Andric #ifndef NDEBUG 16990b57cec5SDimitry Andric SmallVector<BasicBlock *, 32> ExitBlocks; 17000b57cec5SDimitry Andric CurLoop->getUniqueExitBlocks(ExitBlocks); 17010b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), 17020b57cec5SDimitry Andric ExitBlocks.end()); 17030b57cec5SDimitry Andric #endif 17040b57cec5SDimitry Andric 17050b57cec5SDimitry Andric // Clones of this instruction. Don't create more than one per exit block! 17060b57cec5SDimitry Andric SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies; 17070b57cec5SDimitry Andric 17080b57cec5SDimitry Andric // If this instruction is only used outside of the loop, then all users are 17090b57cec5SDimitry Andric // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of 17100b57cec5SDimitry Andric // the instruction. 1711e8d8bef9SDimitry Andric // First check if I is worth sinking for all uses. Sink only when it is worth 1712e8d8bef9SDimitry Andric // across all uses. 17130b57cec5SDimitry Andric SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end()); 17140b57cec5SDimitry Andric for (auto *UI : Users) { 17150b57cec5SDimitry Andric auto *User = cast<Instruction>(UI); 17160b57cec5SDimitry Andric 17170b57cec5SDimitry Andric if (CurLoop->contains(User)) 17180b57cec5SDimitry Andric continue; 17190b57cec5SDimitry Andric 17200b57cec5SDimitry Andric PHINode *PN = cast<PHINode>(User); 17210b57cec5SDimitry Andric assert(ExitBlockSet.count(PN->getParent()) && 17220b57cec5SDimitry Andric "The LCSSA PHI is not in an exit block!"); 1723e8d8bef9SDimitry Andric 17240b57cec5SDimitry Andric // The PHI must be trivially replaceable. 17250b57cec5SDimitry Andric Instruction *New = sinkThroughTriviallyReplaceablePHI( 17260b57cec5SDimitry Andric PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU); 172706c3fb27SDimitry Andric // As we sink the instruction out of the BB, drop its debug location. 172806c3fb27SDimitry Andric New->dropLocation(); 17290b57cec5SDimitry Andric PN->replaceAllUsesWith(New); 173081ad6265SDimitry Andric eraseInstruction(*PN, *SafetyInfo, MSSAU); 17310b57cec5SDimitry Andric Changed = true; 17320b57cec5SDimitry Andric } 17330b57cec5SDimitry Andric return Changed; 17340b57cec5SDimitry Andric } 17350b57cec5SDimitry Andric 17360b57cec5SDimitry Andric /// When an instruction is found to only use loop invariant operands that 17370b57cec5SDimitry Andric /// is safe to hoist, this instruction is called to do the dirty work. 17380b57cec5SDimitry Andric /// 17390b57cec5SDimitry Andric static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, 17400b57cec5SDimitry Andric BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, 174181ad6265SDimitry Andric MemorySSAUpdater &MSSAU, ScalarEvolution *SE, 1742480093f4SDimitry Andric OptimizationRemarkEmitter *ORE) { 1743e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": " 1744e8d8bef9SDimitry Andric << I << "\n"); 17450b57cec5SDimitry Andric ORE->emit([&]() { 17460b57cec5SDimitry Andric return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting " 17470b57cec5SDimitry Andric << ore::NV("Inst", &I); 17480b57cec5SDimitry Andric }); 17490b57cec5SDimitry Andric 17500b57cec5SDimitry Andric // Metadata can be dependent on conditions we are hoisting above. 17510b57cec5SDimitry Andric // Conservatively strip all metadata on the instruction unless we were 17520b57cec5SDimitry Andric // guaranteed to execute I if we entered the loop, in which case the metadata 17530b57cec5SDimitry Andric // is valid in the loop preheader. 1754fe6060f1SDimitry Andric // Similarly, If I is a call and it is not guaranteed to execute in the loop, 1755fe6060f1SDimitry Andric // then moving to the preheader means we should strip attributes on the call 1756fe6060f1SDimitry Andric // that can cause UB since we may be hoisting above conditions that allowed 1757fe6060f1SDimitry Andric // inferring those attributes. They may not be valid at the preheader. 1758fe6060f1SDimitry Andric if ((I.hasMetadataOtherThanDebugLoc() || isa<CallInst>(I)) && 17590b57cec5SDimitry Andric // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning 17600b57cec5SDimitry Andric // time in isGuaranteedToExecute if we don't actually have anything to 17610b57cec5SDimitry Andric // drop. It is a compile time optimization, not required for correctness. 17620b57cec5SDimitry Andric !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop)) 176306c3fb27SDimitry Andric I.dropUBImplyingAttrsAndMetadata(); 17640b57cec5SDimitry Andric 17650b57cec5SDimitry Andric if (isa<PHINode>(I)) 17660b57cec5SDimitry Andric // Move the new node to the end of the phi list in the destination block. 17675f757f3fSDimitry Andric moveInstructionBefore(I, Dest->getFirstNonPHIIt(), *SafetyInfo, MSSAU, SE); 17680b57cec5SDimitry Andric else 17690b57cec5SDimitry Andric // Move the new node to the destination block, before its terminator. 17705f757f3fSDimitry Andric moveInstructionBefore(I, Dest->getTerminator()->getIterator(), *SafetyInfo, 17715f757f3fSDimitry Andric MSSAU, SE); 17720b57cec5SDimitry Andric 1773e8d8bef9SDimitry Andric I.updateLocationAfterHoist(); 17740b57cec5SDimitry Andric 17750b57cec5SDimitry Andric if (isa<LoadInst>(I)) 17760b57cec5SDimitry Andric ++NumMovedLoads; 17770b57cec5SDimitry Andric else if (isa<CallInst>(I)) 17780b57cec5SDimitry Andric ++NumMovedCalls; 17790b57cec5SDimitry Andric ++NumHoisted; 17800b57cec5SDimitry Andric } 17810b57cec5SDimitry Andric 17820b57cec5SDimitry Andric /// Only sink or hoist an instruction if it is not a trapping instruction, 17830b57cec5SDimitry Andric /// or if the instruction is known not to trap when moved to the preheader. 17840b57cec5SDimitry Andric /// or if it is a trapping instruction and is guaranteed to execute. 1785fb03ea46SDimitry Andric static bool isSafeToExecuteUnconditionally( 1786fb03ea46SDimitry Andric Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, 1787fb03ea46SDimitry Andric const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, 1788fb03ea46SDimitry Andric OptimizationRemarkEmitter *ORE, const Instruction *CtxI, 1789bdd1243dSDimitry Andric AssumptionCache *AC, bool AllowSpeculation) { 1790bdd1243dSDimitry Andric if (AllowSpeculation && 1791bdd1243dSDimitry Andric isSafeToSpeculativelyExecute(&Inst, CtxI, AC, DT, TLI)) 17920b57cec5SDimitry Andric return true; 17930b57cec5SDimitry Andric 17940b57cec5SDimitry Andric bool GuaranteedToExecute = 17950b57cec5SDimitry Andric SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop); 17960b57cec5SDimitry Andric 17970b57cec5SDimitry Andric if (!GuaranteedToExecute) { 17980b57cec5SDimitry Andric auto *LI = dyn_cast<LoadInst>(&Inst); 17990b57cec5SDimitry Andric if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand())) 18000b57cec5SDimitry Andric ORE->emit([&]() { 18010b57cec5SDimitry Andric return OptimizationRemarkMissed( 18020b57cec5SDimitry Andric DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI) 18030b57cec5SDimitry Andric << "failed to hoist load with loop-invariant address " 18040b57cec5SDimitry Andric "because load is conditionally executed"; 18050b57cec5SDimitry Andric }); 18060b57cec5SDimitry Andric } 18070b57cec5SDimitry Andric 18080b57cec5SDimitry Andric return GuaranteedToExecute; 18090b57cec5SDimitry Andric } 18100b57cec5SDimitry Andric 18110b57cec5SDimitry Andric namespace { 18120b57cec5SDimitry Andric class LoopPromoter : public LoadAndStorePromoter { 18130b57cec5SDimitry Andric Value *SomePtr; // Designated pointer to store to. 18140b57cec5SDimitry Andric SmallVectorImpl<BasicBlock *> &LoopExitBlocks; 18155f757f3fSDimitry Andric SmallVectorImpl<BasicBlock::iterator> &LoopInsertPts; 18160b57cec5SDimitry Andric SmallVectorImpl<MemoryAccess *> &MSSAInsertPts; 18170b57cec5SDimitry Andric PredIteratorCache &PredCache; 181881ad6265SDimitry Andric MemorySSAUpdater &MSSAU; 18190b57cec5SDimitry Andric LoopInfo &LI; 18200b57cec5SDimitry Andric DebugLoc DL; 1821349cc55cSDimitry Andric Align Alignment; 18220b57cec5SDimitry Andric bool UnorderedAtomic; 18230b57cec5SDimitry Andric AAMDNodes AATags; 18240b57cec5SDimitry Andric ICFLoopSafetyInfo &SafetyInfo; 18254824e7fdSDimitry Andric bool CanInsertStoresInExitBlocks; 1826bdd1243dSDimitry Andric ArrayRef<const Instruction *> Uses; 18270b57cec5SDimitry Andric 1828fe6060f1SDimitry Andric // We're about to add a use of V in a loop exit block. Insert an LCSSA phi 1829fe6060f1SDimitry Andric // (if legal) if doing so would add an out-of-loop use to an instruction 1830fe6060f1SDimitry Andric // defined in-loop. 18310b57cec5SDimitry Andric Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const { 1832fe6060f1SDimitry Andric if (!LI.wouldBeOutOfLoopUseRequiringLCSSA(V, BB)) 1833fe6060f1SDimitry Andric return V; 1834fe6060f1SDimitry Andric 1835fe6060f1SDimitry Andric Instruction *I = cast<Instruction>(V); 18360b57cec5SDimitry Andric // We need to create an LCSSA PHI node for the incoming value and 18370b57cec5SDimitry Andric // store that. 18380b57cec5SDimitry Andric PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB), 18395f757f3fSDimitry Andric I->getName() + ".lcssa"); 18405f757f3fSDimitry Andric PN->insertBefore(BB->begin()); 18410b57cec5SDimitry Andric for (BasicBlock *Pred : PredCache.get(BB)) 18420b57cec5SDimitry Andric PN->addIncoming(I, Pred); 18430b57cec5SDimitry Andric return PN; 18440b57cec5SDimitry Andric } 18450b57cec5SDimitry Andric 18460b57cec5SDimitry Andric public: 18470b57cec5SDimitry Andric LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S, 18480b57cec5SDimitry Andric SmallVectorImpl<BasicBlock *> &LEB, 18495f757f3fSDimitry Andric SmallVectorImpl<BasicBlock::iterator> &LIP, 18500b57cec5SDimitry Andric SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC, 185181ad6265SDimitry Andric MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl, 1852349cc55cSDimitry Andric Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags, 18534824e7fdSDimitry Andric ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks) 1854bdd1243dSDimitry Andric : LoadAndStorePromoter(Insts, S), SomePtr(SP), LoopExitBlocks(LEB), 1855bdd1243dSDimitry Andric LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), PredCache(PIC), MSSAU(MSSAU), 1856bdd1243dSDimitry Andric LI(li), DL(std::move(dl)), Alignment(Alignment), 1857bdd1243dSDimitry Andric UnorderedAtomic(UnorderedAtomic), AATags(AATags), 18584824e7fdSDimitry Andric SafetyInfo(SafetyInfo), 1859bdd1243dSDimitry Andric CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks), Uses(Insts) {} 18600b57cec5SDimitry Andric 18614824e7fdSDimitry Andric void insertStoresInLoopExitBlocks() { 18620b57cec5SDimitry Andric // Insert stores after in the loop exit blocks. Each exit block gets a 18630b57cec5SDimitry Andric // store of the live-out values that feed them. Since we've already told 18640b57cec5SDimitry Andric // the SSA updater about the defs in the loop and the preheader 18650b57cec5SDimitry Andric // definition, it is all set and we can start using it. 1866bdd1243dSDimitry Andric DIAssignID *NewID = nullptr; 18670b57cec5SDimitry Andric for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { 18680b57cec5SDimitry Andric BasicBlock *ExitBlock = LoopExitBlocks[i]; 18690b57cec5SDimitry Andric Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); 18700b57cec5SDimitry Andric LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock); 18710b57cec5SDimitry Andric Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock); 18725f757f3fSDimitry Andric BasicBlock::iterator InsertPos = LoopInsertPts[i]; 18730b57cec5SDimitry Andric StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); 18740b57cec5SDimitry Andric if (UnorderedAtomic) 18750b57cec5SDimitry Andric NewSI->setOrdering(AtomicOrdering::Unordered); 1876349cc55cSDimitry Andric NewSI->setAlignment(Alignment); 18770b57cec5SDimitry Andric NewSI->setDebugLoc(DL); 1878bdd1243dSDimitry Andric // Attach DIAssignID metadata to the new store, generating it on the 1879bdd1243dSDimitry Andric // first loop iteration. 1880bdd1243dSDimitry Andric if (i == 0) { 1881bdd1243dSDimitry Andric // NewSI will have its DIAssignID set here if there are any stores in 1882bdd1243dSDimitry Andric // Uses with a DIAssignID attachment. This merged ID will then be 1883bdd1243dSDimitry Andric // attached to the other inserted stores (in the branch below). 1884bdd1243dSDimitry Andric NewSI->mergeDIAssignID(Uses); 1885bdd1243dSDimitry Andric NewID = cast_or_null<DIAssignID>( 1886bdd1243dSDimitry Andric NewSI->getMetadata(LLVMContext::MD_DIAssignID)); 1887bdd1243dSDimitry Andric } else { 1888bdd1243dSDimitry Andric // Attach the DIAssignID (or nullptr) merged from Uses in the branch 1889bdd1243dSDimitry Andric // above. 1890bdd1243dSDimitry Andric NewSI->setMetadata(LLVMContext::MD_DIAssignID, NewID); 1891bdd1243dSDimitry Andric } 1892bdd1243dSDimitry Andric 18930b57cec5SDimitry Andric if (AATags) 18940b57cec5SDimitry Andric NewSI->setAAMetadata(AATags); 18950b57cec5SDimitry Andric 18960b57cec5SDimitry Andric MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i]; 18970b57cec5SDimitry Andric MemoryAccess *NewMemAcc; 18980b57cec5SDimitry Andric if (!MSSAInsertPoint) { 189981ad6265SDimitry Andric NewMemAcc = MSSAU.createMemoryAccessInBB( 19000b57cec5SDimitry Andric NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning); 19010b57cec5SDimitry Andric } else { 19020b57cec5SDimitry Andric NewMemAcc = 190381ad6265SDimitry Andric MSSAU.createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint); 19040b57cec5SDimitry Andric } 19050b57cec5SDimitry Andric MSSAInsertPts[i] = NewMemAcc; 190681ad6265SDimitry Andric MSSAU.insertDef(cast<MemoryDef>(NewMemAcc), true); 19070b57cec5SDimitry Andric // FIXME: true for safety, false may still be correct. 19080b57cec5SDimitry Andric } 19090b57cec5SDimitry Andric } 19100b57cec5SDimitry Andric 19114824e7fdSDimitry Andric void doExtraRewritesBeforeFinalDeletion() override { 19124824e7fdSDimitry Andric if (CanInsertStoresInExitBlocks) 19134824e7fdSDimitry Andric insertStoresInLoopExitBlocks(); 19144824e7fdSDimitry Andric } 19154824e7fdSDimitry Andric 19160b57cec5SDimitry Andric void instructionDeleted(Instruction *I) const override { 19170b57cec5SDimitry Andric SafetyInfo.removeInstruction(I); 191881ad6265SDimitry Andric MSSAU.removeMemoryAccess(I); 19190b57cec5SDimitry Andric } 19204824e7fdSDimitry Andric 19214824e7fdSDimitry Andric bool shouldDelete(Instruction *I) const override { 19224824e7fdSDimitry Andric if (isa<StoreInst>(I)) 19234824e7fdSDimitry Andric return CanInsertStoresInExitBlocks; 19244824e7fdSDimitry Andric return true; 19254824e7fdSDimitry Andric } 19260b57cec5SDimitry Andric }; 19270b57cec5SDimitry Andric 1928fe6060f1SDimitry Andric bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L, 1929fe6060f1SDimitry Andric DominatorTree *DT) { 1930fe6060f1SDimitry Andric // We can perform the captured-before check against any instruction in the 1931fe6060f1SDimitry Andric // loop header, as the loop header is reachable from any instruction inside 1932fe6060f1SDimitry Andric // the loop. 1933fe6060f1SDimitry Andric // TODO: ReturnCaptures=true shouldn't be necessary here. 1934fe6060f1SDimitry Andric return !PointerMayBeCapturedBefore(V, /* ReturnCaptures */ true, 1935fe6060f1SDimitry Andric /* StoreCaptures */ true, 1936fe6060f1SDimitry Andric L->getHeader()->getTerminator(), DT); 1937fe6060f1SDimitry Andric } 19380b57cec5SDimitry Andric 193904eeddc0SDimitry Andric /// Return true if we can prove that a caller cannot inspect the object if an 194004eeddc0SDimitry Andric /// unwind occurs inside the loop. 194104eeddc0SDimitry Andric bool isNotVisibleOnUnwindInLoop(const Value *Object, const Loop *L, 194204eeddc0SDimitry Andric DominatorTree *DT) { 194304eeddc0SDimitry Andric bool RequiresNoCaptureBeforeUnwind; 194404eeddc0SDimitry Andric if (!isNotVisibleOnUnwind(Object, RequiresNoCaptureBeforeUnwind)) 194504eeddc0SDimitry Andric return false; 19460b57cec5SDimitry Andric 194704eeddc0SDimitry Andric return !RequiresNoCaptureBeforeUnwind || 1948fe6060f1SDimitry Andric isNotCapturedBeforeOrInLoop(Object, L, DT); 19490b57cec5SDimitry Andric } 19500b57cec5SDimitry Andric 1951bdd1243dSDimitry Andric bool isThreadLocalObject(const Value *Object, const Loop *L, DominatorTree *DT, 1952bdd1243dSDimitry Andric TargetTransformInfo *TTI) { 1953bdd1243dSDimitry Andric // The object must be function-local to start with, and then not captured 1954bdd1243dSDimitry Andric // before/in the loop. 1955bdd1243dSDimitry Andric return (isIdentifiedFunctionLocal(Object) && 1956bdd1243dSDimitry Andric isNotCapturedBeforeOrInLoop(Object, L, DT)) || 1957bdd1243dSDimitry Andric (TTI->isSingleThreaded() || SingleThread); 1958bdd1243dSDimitry Andric } 1959bdd1243dSDimitry Andric 19600b57cec5SDimitry Andric } // namespace 19610b57cec5SDimitry Andric 19620b57cec5SDimitry Andric /// Try to promote memory values to scalars by sinking stores out of the 19630b57cec5SDimitry Andric /// loop and moving loads to before the loop. We do this by looping over 19640b57cec5SDimitry Andric /// the stores in the loop, looking for stores to Must pointers which are 19650b57cec5SDimitry Andric /// loop invariant. 19660b57cec5SDimitry Andric /// 19670b57cec5SDimitry Andric bool llvm::promoteLoopAccessesToScalars( 19680b57cec5SDimitry Andric const SmallSetVector<Value *, 8> &PointerMustAliases, 19690b57cec5SDimitry Andric SmallVectorImpl<BasicBlock *> &ExitBlocks, 19705f757f3fSDimitry Andric SmallVectorImpl<BasicBlock::iterator> &InsertPts, 19710b57cec5SDimitry Andric SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC, 1972bdd1243dSDimitry Andric LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, 1973bdd1243dSDimitry Andric const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop, 1974bdd1243dSDimitry Andric MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo, 1975bdd1243dSDimitry Andric OptimizationRemarkEmitter *ORE, bool AllowSpeculation, 1976bdd1243dSDimitry Andric bool HasReadsOutsideSet) { 19770b57cec5SDimitry Andric // Verify inputs. 19780b57cec5SDimitry Andric assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && 1979e8d8bef9SDimitry Andric SafetyInfo != nullptr && 19800b57cec5SDimitry Andric "Unexpected Input to promoteLoopAccessesToScalars"); 19810b57cec5SDimitry Andric 1982bdd1243dSDimitry Andric LLVM_DEBUG({ 1983bdd1243dSDimitry Andric dbgs() << "Trying to promote set of must-aliased pointers:\n"; 1984bdd1243dSDimitry Andric for (Value *Ptr : PointerMustAliases) 1985bdd1243dSDimitry Andric dbgs() << " " << *Ptr << "\n"; 1986bdd1243dSDimitry Andric }); 1987bdd1243dSDimitry Andric ++NumPromotionCandidates; 1988bdd1243dSDimitry Andric 19890b57cec5SDimitry Andric Value *SomePtr = *PointerMustAliases.begin(); 19900b57cec5SDimitry Andric BasicBlock *Preheader = CurLoop->getLoopPreheader(); 19910b57cec5SDimitry Andric 19920b57cec5SDimitry Andric // It is not safe to promote a load/store from the loop if the load/store is 19930b57cec5SDimitry Andric // conditional. For example, turning: 19940b57cec5SDimitry Andric // 19950b57cec5SDimitry Andric // for () { if (c) *P += 1; } 19960b57cec5SDimitry Andric // 19970b57cec5SDimitry Andric // into: 19980b57cec5SDimitry Andric // 19990b57cec5SDimitry Andric // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; 20000b57cec5SDimitry Andric // 20010b57cec5SDimitry Andric // is not safe, because *P may only be valid to access if 'c' is true. 20020b57cec5SDimitry Andric // 20030b57cec5SDimitry Andric // The safety property divides into two parts: 20040b57cec5SDimitry Andric // p1) The memory may not be dereferenceable on entry to the loop. In this 20050b57cec5SDimitry Andric // case, we can't insert the required load in the preheader. 20060b57cec5SDimitry Andric // p2) The memory model does not allow us to insert a store along any dynamic 20070b57cec5SDimitry Andric // path which did not originally have one. 20080b57cec5SDimitry Andric // 20090b57cec5SDimitry Andric // If at least one store is guaranteed to execute, both properties are 20100b57cec5SDimitry Andric // satisfied, and promotion is legal. 20110b57cec5SDimitry Andric // 20120b57cec5SDimitry Andric // This, however, is not a necessary condition. Even if no store/load is 20130b57cec5SDimitry Andric // guaranteed to execute, we can still establish these properties. 20140b57cec5SDimitry Andric // We can establish (p1) by proving that hoisting the load into the preheader 20150b57cec5SDimitry Andric // is safe (i.e. proving dereferenceability on all paths through the loop). We 20160b57cec5SDimitry Andric // can use any access within the alias set to prove dereferenceability, 20170b57cec5SDimitry Andric // since they're all must alias. 20180b57cec5SDimitry Andric // 20190b57cec5SDimitry Andric // There are two ways establish (p2): 20200b57cec5SDimitry Andric // a) Prove the location is thread-local. In this case the memory model 20210b57cec5SDimitry Andric // requirement does not apply, and stores are safe to insert. 20220b57cec5SDimitry Andric // b) Prove a store dominates every exit block. In this case, if an exit 20230b57cec5SDimitry Andric // blocks is reached, the original dynamic path would have taken us through 20240b57cec5SDimitry Andric // the store, so inserting a store into the exit block is safe. Note that this 20250b57cec5SDimitry Andric // is different from the store being guaranteed to execute. For instance, 20260b57cec5SDimitry Andric // if an exception is thrown on the first iteration of the loop, the original 20270b57cec5SDimitry Andric // store is never executed, but the exit blocks are not executed either. 20280b57cec5SDimitry Andric 20290b57cec5SDimitry Andric bool DereferenceableInPH = false; 203081ad6265SDimitry Andric bool StoreIsGuanteedToExecute = false; 20314824e7fdSDimitry Andric bool FoundLoadToPromote = false; 2032bdd1243dSDimitry Andric // Goes from Unknown to either Safe or Unsafe, but can't switch between them. 2033bdd1243dSDimitry Andric enum { 2034bdd1243dSDimitry Andric StoreSafe, 2035bdd1243dSDimitry Andric StoreUnsafe, 2036bdd1243dSDimitry Andric StoreSafetyUnknown, 2037bdd1243dSDimitry Andric } StoreSafety = StoreSafetyUnknown; 20380b57cec5SDimitry Andric 20390b57cec5SDimitry Andric SmallVector<Instruction *, 64> LoopUses; 20400b57cec5SDimitry Andric 20410b57cec5SDimitry Andric // We start with an alignment of one and try to find instructions that allow 20420b57cec5SDimitry Andric // us to prove better alignment. 20435ffd83dbSDimitry Andric Align Alignment; 20440b57cec5SDimitry Andric // Keep track of which types of access we see 20450b57cec5SDimitry Andric bool SawUnorderedAtomic = false; 20460b57cec5SDimitry Andric bool SawNotAtomic = false; 20470b57cec5SDimitry Andric AAMDNodes AATags; 20480b57cec5SDimitry Andric 20490fca6ea1SDimitry Andric const DataLayout &MDL = Preheader->getDataLayout(); 20500b57cec5SDimitry Andric 2051bdd1243dSDimitry Andric // If there are reads outside the promoted set, then promoting stores is 2052bdd1243dSDimitry Andric // definitely not safe. 2053bdd1243dSDimitry Andric if (HasReadsOutsideSet) 2054bdd1243dSDimitry Andric StoreSafety = StoreUnsafe; 2055bdd1243dSDimitry Andric 2056bdd1243dSDimitry Andric if (StoreSafety == StoreSafetyUnknown && SafetyInfo->anyBlockMayThrow()) { 20570b57cec5SDimitry Andric // If a loop can throw, we have to insert a store along each unwind edge. 20580b57cec5SDimitry Andric // That said, we can't actually make the unwind edge explicit. Therefore, 20590b57cec5SDimitry Andric // we have to prove that the store is dead along the unwind edge. We do 20600b57cec5SDimitry Andric // this by proving that the caller can't have a reference to the object 20610b57cec5SDimitry Andric // after return and thus can't possibly load from the object. 2062e8d8bef9SDimitry Andric Value *Object = getUnderlyingObject(SomePtr); 206304eeddc0SDimitry Andric if (!isNotVisibleOnUnwindInLoop(Object, CurLoop, DT)) 2064bdd1243dSDimitry Andric StoreSafety = StoreUnsafe; 20650b57cec5SDimitry Andric } 20660b57cec5SDimitry Andric 2067bdd1243dSDimitry Andric // Check that all accesses to pointers in the alias set use the same type. 20684824e7fdSDimitry Andric // We cannot (yet) promote a memory location that is loaded and stored in 20690b57cec5SDimitry Andric // different sizes. While we are at it, collect alignment and AA info. 20704824e7fdSDimitry Andric Type *AccessTy = nullptr; 20710b57cec5SDimitry Andric for (Value *ASIV : PointerMustAliases) { 207281ad6265SDimitry Andric for (Use &U : ASIV->uses()) { 20730b57cec5SDimitry Andric // Ignore instructions that are outside the loop. 207481ad6265SDimitry Andric Instruction *UI = dyn_cast<Instruction>(U.getUser()); 20750b57cec5SDimitry Andric if (!UI || !CurLoop->contains(UI)) 20760b57cec5SDimitry Andric continue; 20770b57cec5SDimitry Andric 20780b57cec5SDimitry Andric // If there is an non-load/store instruction in the loop, we can't promote 20790b57cec5SDimitry Andric // it. 20800b57cec5SDimitry Andric if (LoadInst *Load = dyn_cast<LoadInst>(UI)) { 20810b57cec5SDimitry Andric if (!Load->isUnordered()) 20820b57cec5SDimitry Andric return false; 20830b57cec5SDimitry Andric 20840b57cec5SDimitry Andric SawUnorderedAtomic |= Load->isAtomic(); 20850b57cec5SDimitry Andric SawNotAtomic |= !Load->isAtomic(); 20864824e7fdSDimitry Andric FoundLoadToPromote = true; 20870b57cec5SDimitry Andric 20885ffd83dbSDimitry Andric Align InstAlignment = Load->getAlign(); 20890b57cec5SDimitry Andric 20900b57cec5SDimitry Andric // Note that proving a load safe to speculate requires proving 20910b57cec5SDimitry Andric // sufficient alignment at the target location. Proving it guaranteed 20920b57cec5SDimitry Andric // to execute does as well. Thus we can increase our guaranteed 20930b57cec5SDimitry Andric // alignment as well. 20940b57cec5SDimitry Andric if (!DereferenceableInPH || (InstAlignment > Alignment)) 2095fb03ea46SDimitry Andric if (isSafeToExecuteUnconditionally( 2096fb03ea46SDimitry Andric *Load, DT, TLI, CurLoop, SafetyInfo, ORE, 2097bdd1243dSDimitry Andric Preheader->getTerminator(), AC, AllowSpeculation)) { 20980b57cec5SDimitry Andric DereferenceableInPH = true; 20990b57cec5SDimitry Andric Alignment = std::max(Alignment, InstAlignment); 21000b57cec5SDimitry Andric } 21010b57cec5SDimitry Andric } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) { 21020b57cec5SDimitry Andric // Stores *of* the pointer are not interesting, only stores *to* the 21030b57cec5SDimitry Andric // pointer. 210481ad6265SDimitry Andric if (U.getOperandNo() != StoreInst::getPointerOperandIndex()) 21050b57cec5SDimitry Andric continue; 21060b57cec5SDimitry Andric if (!Store->isUnordered()) 21070b57cec5SDimitry Andric return false; 21080b57cec5SDimitry Andric 21090b57cec5SDimitry Andric SawUnorderedAtomic |= Store->isAtomic(); 21100b57cec5SDimitry Andric SawNotAtomic |= !Store->isAtomic(); 21110b57cec5SDimitry Andric 21120b57cec5SDimitry Andric // If the store is guaranteed to execute, both properties are satisfied. 21130b57cec5SDimitry Andric // We may want to check if a store is guaranteed to execute even if we 21140b57cec5SDimitry Andric // already know that promotion is safe, since it may have higher 21150b57cec5SDimitry Andric // alignment than any other guaranteed stores, in which case we can 21160b57cec5SDimitry Andric // raise the alignment on the promoted store. 21175ffd83dbSDimitry Andric Align InstAlignment = Store->getAlign(); 211881ad6265SDimitry Andric bool GuaranteedToExecute = 211981ad6265SDimitry Andric SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop); 212081ad6265SDimitry Andric StoreIsGuanteedToExecute |= GuaranteedToExecute; 212181ad6265SDimitry Andric if (GuaranteedToExecute) { 21220b57cec5SDimitry Andric DereferenceableInPH = true; 2123bdd1243dSDimitry Andric if (StoreSafety == StoreSafetyUnknown) 2124bdd1243dSDimitry Andric StoreSafety = StoreSafe; 21250b57cec5SDimitry Andric Alignment = std::max(Alignment, InstAlignment); 21260b57cec5SDimitry Andric } 21270b57cec5SDimitry Andric 21280b57cec5SDimitry Andric // If a store dominates all exit blocks, it is safe to sink. 21290b57cec5SDimitry Andric // As explained above, if an exit block was executed, a dominating 21300b57cec5SDimitry Andric // store must have been executed at least once, so we are not 21310b57cec5SDimitry Andric // introducing stores on paths that did not have them. 21320b57cec5SDimitry Andric // Note that this only looks at explicit exit blocks. If we ever 21330b57cec5SDimitry Andric // start sinking stores into unwind edges (see above), this will break. 2134bdd1243dSDimitry Andric if (StoreSafety == StoreSafetyUnknown && 2135bdd1243dSDimitry Andric llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) { 21360b57cec5SDimitry Andric return DT->dominates(Store->getParent(), Exit); 2137bdd1243dSDimitry Andric })) 2138bdd1243dSDimitry Andric StoreSafety = StoreSafe; 21390b57cec5SDimitry Andric 21400b57cec5SDimitry Andric // If the store is not guaranteed to execute, we may still get 21410b57cec5SDimitry Andric // deref info through it. 21420b57cec5SDimitry Andric if (!DereferenceableInPH) { 21430b57cec5SDimitry Andric DereferenceableInPH = isDereferenceableAndAlignedPointer( 21440b57cec5SDimitry Andric Store->getPointerOperand(), Store->getValueOperand()->getType(), 2145bdd1243dSDimitry Andric Store->getAlign(), MDL, Preheader->getTerminator(), AC, DT, TLI); 21460b57cec5SDimitry Andric } 21470b57cec5SDimitry Andric } else 2148bdd1243dSDimitry Andric continue; // Not a load or store. 21490b57cec5SDimitry Andric 21504824e7fdSDimitry Andric if (!AccessTy) 21514824e7fdSDimitry Andric AccessTy = getLoadStoreType(UI); 21524824e7fdSDimitry Andric else if (AccessTy != getLoadStoreType(UI)) 21534824e7fdSDimitry Andric return false; 21544824e7fdSDimitry Andric 21550b57cec5SDimitry Andric // Merge the AA tags. 21560b57cec5SDimitry Andric if (LoopUses.empty()) { 21570b57cec5SDimitry Andric // On the first load/store, just take its AA tags. 2158349cc55cSDimitry Andric AATags = UI->getAAMetadata(); 21590b57cec5SDimitry Andric } else if (AATags) { 2160349cc55cSDimitry Andric AATags = AATags.merge(UI->getAAMetadata()); 21610b57cec5SDimitry Andric } 21620b57cec5SDimitry Andric 21630b57cec5SDimitry Andric LoopUses.push_back(UI); 21640b57cec5SDimitry Andric } 21650b57cec5SDimitry Andric } 21660b57cec5SDimitry Andric 21670b57cec5SDimitry Andric // If we found both an unordered atomic instruction and a non-atomic memory 21680b57cec5SDimitry Andric // access, bail. We can't blindly promote non-atomic to atomic since we 21690b57cec5SDimitry Andric // might not be able to lower the result. We can't downgrade since that 21700b57cec5SDimitry Andric // would violate memory model. Also, align 0 is an error for atomics. 21710b57cec5SDimitry Andric if (SawUnorderedAtomic && SawNotAtomic) 21720b57cec5SDimitry Andric return false; 21730b57cec5SDimitry Andric 21740b57cec5SDimitry Andric // If we're inserting an atomic load in the preheader, we must be able to 21750b57cec5SDimitry Andric // lower it. We're only guaranteed to be able to lower naturally aligned 21760b57cec5SDimitry Andric // atomics. 21774824e7fdSDimitry Andric if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(AccessTy)) 21780b57cec5SDimitry Andric return false; 21790b57cec5SDimitry Andric 21800b57cec5SDimitry Andric // If we couldn't prove we can hoist the load, bail. 2181bdd1243dSDimitry Andric if (!DereferenceableInPH) { 2182bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Not promoting: Not dereferenceable in preheader\n"); 21830b57cec5SDimitry Andric return false; 2184bdd1243dSDimitry Andric } 21850b57cec5SDimitry Andric 21860b57cec5SDimitry Andric // We know we can hoist the load, but don't have a guaranteed store. 2187bdd1243dSDimitry Andric // Check whether the location is writable and thread-local. If it is, then we 2188bdd1243dSDimitry Andric // can insert stores along paths which originally didn't have them without 2189bdd1243dSDimitry Andric // violating the memory model. 2190bdd1243dSDimitry Andric if (StoreSafety == StoreSafetyUnknown) { 2191e8d8bef9SDimitry Andric Value *Object = getUnderlyingObject(SomePtr); 21925f757f3fSDimitry Andric bool ExplicitlyDereferenceableOnly; 21935f757f3fSDimitry Andric if (isWritableObject(Object, ExplicitlyDereferenceableOnly) && 21945f757f3fSDimitry Andric (!ExplicitlyDereferenceableOnly || 21955f757f3fSDimitry Andric isDereferenceablePointer(SomePtr, AccessTy, MDL)) && 2196bdd1243dSDimitry Andric isThreadLocalObject(Object, CurLoop, DT, TTI)) 2197bdd1243dSDimitry Andric StoreSafety = StoreSafe; 21980b57cec5SDimitry Andric } 21990b57cec5SDimitry Andric 22004824e7fdSDimitry Andric // If we've still failed to prove we can sink the store, hoist the load 22014824e7fdSDimitry Andric // only, if possible. 2202bdd1243dSDimitry Andric if (StoreSafety != StoreSafe && !FoundLoadToPromote) 22034824e7fdSDimitry Andric // If we cannot hoist the load either, give up. 22040b57cec5SDimitry Andric return false; 22050b57cec5SDimitry Andric 22064824e7fdSDimitry Andric // Lets do the promotion! 2207bdd1243dSDimitry Andric if (StoreSafety == StoreSafe) { 22084824e7fdSDimitry Andric LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr 22090b57cec5SDimitry Andric << '\n'); 2210bdd1243dSDimitry Andric ++NumLoadStorePromoted; 2211bdd1243dSDimitry Andric } else { 22124824e7fdSDimitry Andric LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr 22134824e7fdSDimitry Andric << '\n'); 2214bdd1243dSDimitry Andric ++NumLoadPromoted; 2215bdd1243dSDimitry Andric } 22164824e7fdSDimitry Andric 22170b57cec5SDimitry Andric ORE->emit([&]() { 22180b57cec5SDimitry Andric return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", 22190b57cec5SDimitry Andric LoopUses[0]) 22200b57cec5SDimitry Andric << "Moving accesses to memory location out of the loop"; 22210b57cec5SDimitry Andric }); 22220b57cec5SDimitry Andric 22235ffd83dbSDimitry Andric // Look at all the loop uses, and try to merge their locations. 222406c3fb27SDimitry Andric std::vector<DILocation *> LoopUsesLocs; 2225bdd1243dSDimitry Andric for (auto *U : LoopUses) 22265ffd83dbSDimitry Andric LoopUsesLocs.push_back(U->getDebugLoc().get()); 22275ffd83dbSDimitry Andric auto DL = DebugLoc(DILocation::getMergedLocations(LoopUsesLocs)); 22280b57cec5SDimitry Andric 22290b57cec5SDimitry Andric // We use the SSAUpdater interface to insert phi nodes as required. 22300b57cec5SDimitry Andric SmallVector<PHINode *, 16> NewPHIs; 22310b57cec5SDimitry Andric SSAUpdater SSA(&NewPHIs); 2232bdd1243dSDimitry Andric LoopPromoter Promoter(SomePtr, LoopUses, SSA, ExitBlocks, InsertPts, 2233bdd1243dSDimitry Andric MSSAInsertPts, PIC, MSSAU, *LI, DL, Alignment, 2234bdd1243dSDimitry Andric SawUnorderedAtomic, AATags, *SafetyInfo, 2235bdd1243dSDimitry Andric StoreSafety == StoreSafe); 22360b57cec5SDimitry Andric 22370b57cec5SDimitry Andric // Set up the preheader to have a definition of the value. It is the live-out 22380b57cec5SDimitry Andric // value from the preheader that uses in the loop will use. 223981ad6265SDimitry Andric LoadInst *PreheaderLoad = nullptr; 224081ad6265SDimitry Andric if (FoundLoadToPromote || !StoreIsGuanteedToExecute) { 224181ad6265SDimitry Andric PreheaderLoad = 224281ad6265SDimitry Andric new LoadInst(AccessTy, SomePtr, SomePtr->getName() + ".promoted", 22430fca6ea1SDimitry Andric Preheader->getTerminator()->getIterator()); 22440b57cec5SDimitry Andric if (SawUnorderedAtomic) 22450b57cec5SDimitry Andric PreheaderLoad->setOrdering(AtomicOrdering::Unordered); 22465ffd83dbSDimitry Andric PreheaderLoad->setAlignment(Alignment); 22475ffd83dbSDimitry Andric PreheaderLoad->setDebugLoc(DebugLoc()); 22480b57cec5SDimitry Andric if (AATags) 22490b57cec5SDimitry Andric PreheaderLoad->setAAMetadata(AATags); 22500b57cec5SDimitry Andric 225181ad6265SDimitry Andric MemoryAccess *PreheaderLoadMemoryAccess = MSSAU.createMemoryAccessInBB( 22520b57cec5SDimitry Andric PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End); 22530b57cec5SDimitry Andric MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess); 225481ad6265SDimitry Andric MSSAU.insertUse(NewMemUse, /*RenameUses=*/true); 225581ad6265SDimitry Andric SSA.AddAvailableValue(Preheader, PreheaderLoad); 225681ad6265SDimitry Andric } else { 225781ad6265SDimitry Andric SSA.AddAvailableValue(Preheader, PoisonValue::get(AccessTy)); 225881ad6265SDimitry Andric } 22590b57cec5SDimitry Andric 2260349cc55cSDimitry Andric if (VerifyMemorySSA) 226181ad6265SDimitry Andric MSSAU.getMemorySSA()->verifyMemorySSA(); 22620b57cec5SDimitry Andric // Rewrite all the loads in the loop and remember all the definitions from 22630b57cec5SDimitry Andric // stores in the loop. 22640b57cec5SDimitry Andric Promoter.run(LoopUses); 22650b57cec5SDimitry Andric 2266349cc55cSDimitry Andric if (VerifyMemorySSA) 226781ad6265SDimitry Andric MSSAU.getMemorySSA()->verifyMemorySSA(); 22680b57cec5SDimitry Andric // If the SSAUpdater didn't use the load in the preheader, just zap it now. 226981ad6265SDimitry Andric if (PreheaderLoad && PreheaderLoad->use_empty()) 2270349cc55cSDimitry Andric eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU); 22710b57cec5SDimitry Andric 22720b57cec5SDimitry Andric return true; 22730b57cec5SDimitry Andric } 22740b57cec5SDimitry Andric 2275fe6060f1SDimitry Andric static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L, 2276fe6060f1SDimitry Andric function_ref<void(Instruction *)> Fn) { 2277fe6060f1SDimitry Andric for (const BasicBlock *BB : L->blocks()) 2278fe6060f1SDimitry Andric if (const auto *Accesses = MSSA->getBlockAccesses(BB)) 2279fe6060f1SDimitry Andric for (const auto &Access : *Accesses) 2280fe6060f1SDimitry Andric if (const auto *MUD = dyn_cast<MemoryUseOrDef>(&Access)) 2281fe6060f1SDimitry Andric Fn(MUD->getMemoryInst()); 2282fe6060f1SDimitry Andric } 2283fe6060f1SDimitry Andric 2284bdd1243dSDimitry Andric // The bool indicates whether there might be reads outside the set, in which 2285bdd1243dSDimitry Andric // case only loads may be promoted. 2286bdd1243dSDimitry Andric static SmallVector<PointersAndHasReadsOutsideSet, 0> 2287fe6060f1SDimitry Andric collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) { 2288bdd1243dSDimitry Andric BatchAAResults BatchAA(*AA); 2289bdd1243dSDimitry Andric AliasSetTracker AST(BatchAA); 2290fe6060f1SDimitry Andric 2291fe6060f1SDimitry Andric auto IsPotentiallyPromotable = [L](const Instruction *I) { 2292fe6060f1SDimitry Andric if (const auto *SI = dyn_cast<StoreInst>(I)) 2293fe6060f1SDimitry Andric return L->isLoopInvariant(SI->getPointerOperand()); 2294fe6060f1SDimitry Andric if (const auto *LI = dyn_cast<LoadInst>(I)) 2295fe6060f1SDimitry Andric return L->isLoopInvariant(LI->getPointerOperand()); 2296fe6060f1SDimitry Andric return false; 2297fe6060f1SDimitry Andric }; 2298fe6060f1SDimitry Andric 229981ad6265SDimitry Andric // Populate AST with potentially promotable accesses. 2300fe6060f1SDimitry Andric SmallPtrSet<Value *, 16> AttemptingPromotion; 2301fe6060f1SDimitry Andric foreachMemoryAccess(MSSA, L, [&](Instruction *I) { 2302fe6060f1SDimitry Andric if (IsPotentiallyPromotable(I)) { 2303fe6060f1SDimitry Andric AttemptingPromotion.insert(I); 2304fe6060f1SDimitry Andric AST.add(I); 2305fe6060f1SDimitry Andric } 2306fe6060f1SDimitry Andric }); 2307fe6060f1SDimitry Andric 2308fe6060f1SDimitry Andric // We're only interested in must-alias sets that contain a mod. 2309bdd1243dSDimitry Andric SmallVector<PointerIntPair<const AliasSet *, 1, bool>, 8> Sets; 2310fe6060f1SDimitry Andric for (AliasSet &AS : AST) 2311fe6060f1SDimitry Andric if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias()) 2312bdd1243dSDimitry Andric Sets.push_back({&AS, false}); 2313fe6060f1SDimitry Andric 2314fe6060f1SDimitry Andric if (Sets.empty()) 2315fe6060f1SDimitry Andric return {}; // Nothing to promote... 2316fe6060f1SDimitry Andric 2317fe6060f1SDimitry Andric // Discard any sets for which there is an aliasing non-promotable access. 2318fe6060f1SDimitry Andric foreachMemoryAccess(MSSA, L, [&](Instruction *I) { 2319fe6060f1SDimitry Andric if (AttemptingPromotion.contains(I)) 2320fe6060f1SDimitry Andric return; 2321fe6060f1SDimitry Andric 2322bdd1243dSDimitry Andric llvm::erase_if(Sets, [&](PointerIntPair<const AliasSet *, 1, bool> &Pair) { 2323bdd1243dSDimitry Andric ModRefInfo MR = Pair.getPointer()->aliasesUnknownInst(I, BatchAA); 2324bdd1243dSDimitry Andric // Cannot promote if there are writes outside the set. 2325bdd1243dSDimitry Andric if (isModSet(MR)) 2326bdd1243dSDimitry Andric return true; 2327bdd1243dSDimitry Andric if (isRefSet(MR)) { 2328bdd1243dSDimitry Andric // Remember reads outside the set. 2329bdd1243dSDimitry Andric Pair.setInt(true); 2330bdd1243dSDimitry Andric // If this is a mod-only set and there are reads outside the set, 2331bdd1243dSDimitry Andric // we will not be able to promote, so bail out early. 2332bdd1243dSDimitry Andric return !Pair.getPointer()->isRef(); 2333bdd1243dSDimitry Andric } 2334bdd1243dSDimitry Andric return false; 2335fe6060f1SDimitry Andric }); 2336fe6060f1SDimitry Andric }); 2337fe6060f1SDimitry Andric 2338bdd1243dSDimitry Andric SmallVector<std::pair<SmallSetVector<Value *, 8>, bool>, 0> Result; 2339bdd1243dSDimitry Andric for (auto [Set, HasReadsOutsideSet] : Sets) { 2340fe6060f1SDimitry Andric SmallSetVector<Value *, 8> PointerMustAliases; 23417a6dacacSDimitry Andric for (const auto &MemLoc : *Set) 23427a6dacacSDimitry Andric PointerMustAliases.insert(const_cast<Value *>(MemLoc.Ptr)); 2343bdd1243dSDimitry Andric Result.emplace_back(std::move(PointerMustAliases), HasReadsOutsideSet); 2344fe6060f1SDimitry Andric } 2345fe6060f1SDimitry Andric 2346fe6060f1SDimitry Andric return Result; 2347fe6060f1SDimitry Andric } 2348fe6060f1SDimitry Andric 234981ad6265SDimitry Andric static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU, 2350e8d8bef9SDimitry Andric Loop *CurLoop, Instruction &I, 235106c3fb27SDimitry Andric SinkAndHoistLICMFlags &Flags, 235206c3fb27SDimitry Andric bool InvariantGroup) { 23530b57cec5SDimitry Andric // For hoisting, use the walker to determine safety 2354e8d8bef9SDimitry Andric if (!Flags.getIsSink()) { 235506c3fb27SDimitry Andric // If hoisting an invariant group, we only need to check that there 235606c3fb27SDimitry Andric // is no store to the loaded pointer between the start of the loop, 235706c3fb27SDimitry Andric // and the load (since all values must be the same). 235806c3fb27SDimitry Andric 235906c3fb27SDimitry Andric // This can be checked in two conditions: 236006c3fb27SDimitry Andric // 1) if the memoryaccess is outside the loop 236106c3fb27SDimitry Andric // 2) the earliest access is at the loop header, 236206c3fb27SDimitry Andric // if the memory loaded is the phi node 236306c3fb27SDimitry Andric 236406c3fb27SDimitry Andric BatchAAResults BAA(MSSA->getAA()); 236506c3fb27SDimitry Andric MemoryAccess *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, MU); 23660b57cec5SDimitry Andric return !MSSA->isLiveOnEntryDef(Source) && 236706c3fb27SDimitry Andric CurLoop->contains(Source->getBlock()) && 236806c3fb27SDimitry Andric !(InvariantGroup && Source->getBlock() == CurLoop->getHeader() && isa<MemoryPhi>(Source)); 23690b57cec5SDimitry Andric } 23700b57cec5SDimitry Andric 23710b57cec5SDimitry Andric // For sinking, we'd need to check all Defs below this use. The getClobbering 23720b57cec5SDimitry Andric // call will look on the backedge of the loop, but will check aliasing with 23730b57cec5SDimitry Andric // the instructions on the previous iteration. 23740b57cec5SDimitry Andric // For example: 23750b57cec5SDimitry Andric // for (i ... ) 23760b57cec5SDimitry Andric // load a[i] ( Use (LoE) 23770b57cec5SDimitry Andric // store a[i] ( 1 = Def (2), with 2 = Phi for the loop. 23780b57cec5SDimitry Andric // i++; 23790b57cec5SDimitry Andric // The load sees no clobbering inside the loop, as the backedge alias check 23800b57cec5SDimitry Andric // does phi translation, and will check aliasing against store a[i-1]. 23810b57cec5SDimitry Andric // However sinking the load outside the loop, below the store is incorrect. 23820b57cec5SDimitry Andric 23830b57cec5SDimitry Andric // For now, only sink if there are no Defs in the loop, and the existing ones 23840b57cec5SDimitry Andric // precede the use and are in the same block. 23850b57cec5SDimitry Andric // FIXME: Increase precision: Safe to sink if Use post dominates the Def; 23860b57cec5SDimitry Andric // needs PostDominatorTreeAnalysis. 23870b57cec5SDimitry Andric // FIXME: More precise: no Defs that alias this Use. 2388e8d8bef9SDimitry Andric if (Flags.tooManyMemoryAccesses()) 23890b57cec5SDimitry Andric return true; 23900b57cec5SDimitry Andric for (auto *BB : CurLoop->getBlocks()) 239181ad6265SDimitry Andric if (pointerInvalidatedByBlock(*BB, *MSSA, *MU)) 2392e8d8bef9SDimitry Andric return true; 2393e8d8bef9SDimitry Andric // When sinking, the source block may not be part of the loop so check it. 2394e8d8bef9SDimitry Andric if (!CurLoop->contains(&I)) 239581ad6265SDimitry Andric return pointerInvalidatedByBlock(*I.getParent(), *MSSA, *MU); 2396e8d8bef9SDimitry Andric 2397e8d8bef9SDimitry Andric return false; 2398e8d8bef9SDimitry Andric } 2399e8d8bef9SDimitry Andric 240081ad6265SDimitry Andric bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU) { 2401e8d8bef9SDimitry Andric if (const auto *Accesses = MSSA.getBlockDefs(&BB)) 24020b57cec5SDimitry Andric for (const auto &MA : *Accesses) 24030b57cec5SDimitry Andric if (const auto *MD = dyn_cast<MemoryDef>(&MA)) 2404e8d8bef9SDimitry Andric if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU)) 24050b57cec5SDimitry Andric return true; 24060b57cec5SDimitry Andric return false; 24070b57cec5SDimitry Andric } 24080b57cec5SDimitry Andric 240906c3fb27SDimitry Andric /// Try to simplify things like (A < INV_1 AND icmp A < INV_2) into (A < 241006c3fb27SDimitry Andric /// min(INV_1, INV_2)), if INV_1 and INV_2 are both loop invariants and their 241106c3fb27SDimitry Andric /// minimun can be computed outside of loop, and X is not a loop-invariant. 241206c3fb27SDimitry Andric static bool hoistMinMax(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, 241306c3fb27SDimitry Andric MemorySSAUpdater &MSSAU) { 241406c3fb27SDimitry Andric bool Inverse = false; 241506c3fb27SDimitry Andric using namespace PatternMatch; 241606c3fb27SDimitry Andric Value *Cond1, *Cond2; 241706c3fb27SDimitry Andric if (match(&I, m_LogicalOr(m_Value(Cond1), m_Value(Cond2)))) { 241806c3fb27SDimitry Andric Inverse = true; 241906c3fb27SDimitry Andric } else if (match(&I, m_LogicalAnd(m_Value(Cond1), m_Value(Cond2)))) { 242006c3fb27SDimitry Andric // Do nothing 242106c3fb27SDimitry Andric } else 242206c3fb27SDimitry Andric return false; 242306c3fb27SDimitry Andric 242406c3fb27SDimitry Andric auto MatchICmpAgainstInvariant = [&](Value *C, ICmpInst::Predicate &P, 242506c3fb27SDimitry Andric Value *&LHS, Value *&RHS) { 242606c3fb27SDimitry Andric if (!match(C, m_OneUse(m_ICmp(P, m_Value(LHS), m_Value(RHS))))) 242706c3fb27SDimitry Andric return false; 242806c3fb27SDimitry Andric if (!LHS->getType()->isIntegerTy()) 242906c3fb27SDimitry Andric return false; 243006c3fb27SDimitry Andric if (!ICmpInst::isRelational(P)) 243106c3fb27SDimitry Andric return false; 243206c3fb27SDimitry Andric if (L.isLoopInvariant(LHS)) { 243306c3fb27SDimitry Andric std::swap(LHS, RHS); 243406c3fb27SDimitry Andric P = ICmpInst::getSwappedPredicate(P); 243506c3fb27SDimitry Andric } 243606c3fb27SDimitry Andric if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS)) 243706c3fb27SDimitry Andric return false; 243806c3fb27SDimitry Andric if (Inverse) 243906c3fb27SDimitry Andric P = ICmpInst::getInversePredicate(P); 244006c3fb27SDimitry Andric return true; 244106c3fb27SDimitry Andric }; 244206c3fb27SDimitry Andric ICmpInst::Predicate P1, P2; 244306c3fb27SDimitry Andric Value *LHS1, *LHS2, *RHS1, *RHS2; 244406c3fb27SDimitry Andric if (!MatchICmpAgainstInvariant(Cond1, P1, LHS1, RHS1) || 244506c3fb27SDimitry Andric !MatchICmpAgainstInvariant(Cond2, P2, LHS2, RHS2)) 244606c3fb27SDimitry Andric return false; 244706c3fb27SDimitry Andric if (P1 != P2 || LHS1 != LHS2) 244806c3fb27SDimitry Andric return false; 244906c3fb27SDimitry Andric 245006c3fb27SDimitry Andric // Everything is fine, we can do the transform. 245106c3fb27SDimitry Andric bool UseMin = ICmpInst::isLT(P1) || ICmpInst::isLE(P1); 245206c3fb27SDimitry Andric assert( 245306c3fb27SDimitry Andric (UseMin || ICmpInst::isGT(P1) || ICmpInst::isGE(P1)) && 245406c3fb27SDimitry Andric "Relational predicate is either less (or equal) or greater (or equal)!"); 245506c3fb27SDimitry Andric Intrinsic::ID id = ICmpInst::isSigned(P1) 245606c3fb27SDimitry Andric ? (UseMin ? Intrinsic::smin : Intrinsic::smax) 245706c3fb27SDimitry Andric : (UseMin ? Intrinsic::umin : Intrinsic::umax); 245806c3fb27SDimitry Andric auto *Preheader = L.getLoopPreheader(); 245906c3fb27SDimitry Andric assert(Preheader && "Loop is not in simplify form?"); 246006c3fb27SDimitry Andric IRBuilder<> Builder(Preheader->getTerminator()); 246106c3fb27SDimitry Andric // We are about to create a new guaranteed use for RHS2 which might not exist 246206c3fb27SDimitry Andric // before (if it was a non-taken input of logical and/or instruction). If it 246306c3fb27SDimitry Andric // was poison, we need to freeze it. Note that no new use for LHS and RHS1 are 246406c3fb27SDimitry Andric // introduced, so they don't need this. 246506c3fb27SDimitry Andric if (isa<SelectInst>(I)) 246606c3fb27SDimitry Andric RHS2 = Builder.CreateFreeze(RHS2, RHS2->getName() + ".fr"); 246706c3fb27SDimitry Andric Value *NewRHS = Builder.CreateBinaryIntrinsic( 246806c3fb27SDimitry Andric id, RHS1, RHS2, nullptr, StringRef("invariant.") + 246906c3fb27SDimitry Andric (ICmpInst::isSigned(P1) ? "s" : "u") + 247006c3fb27SDimitry Andric (UseMin ? "min" : "max")); 247106c3fb27SDimitry Andric Builder.SetInsertPoint(&I); 247206c3fb27SDimitry Andric ICmpInst::Predicate P = P1; 247306c3fb27SDimitry Andric if (Inverse) 247406c3fb27SDimitry Andric P = ICmpInst::getInversePredicate(P); 247506c3fb27SDimitry Andric Value *NewCond = Builder.CreateICmp(P, LHS1, NewRHS); 247606c3fb27SDimitry Andric NewCond->takeName(&I); 247706c3fb27SDimitry Andric I.replaceAllUsesWith(NewCond); 247806c3fb27SDimitry Andric eraseInstruction(I, SafetyInfo, MSSAU); 247906c3fb27SDimitry Andric eraseInstruction(*cast<Instruction>(Cond1), SafetyInfo, MSSAU); 248006c3fb27SDimitry Andric eraseInstruction(*cast<Instruction>(Cond2), SafetyInfo, MSSAU); 248106c3fb27SDimitry Andric return true; 248206c3fb27SDimitry Andric } 248306c3fb27SDimitry Andric 248406c3fb27SDimitry Andric /// Reassociate gep (gep ptr, idx1), idx2 to gep (gep ptr, idx2), idx1 if 248506c3fb27SDimitry Andric /// this allows hoisting the inner GEP. 248606c3fb27SDimitry Andric static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, 248706c3fb27SDimitry Andric MemorySSAUpdater &MSSAU, AssumptionCache *AC, 248806c3fb27SDimitry Andric DominatorTree *DT) { 248906c3fb27SDimitry Andric auto *GEP = dyn_cast<GetElementPtrInst>(&I); 249006c3fb27SDimitry Andric if (!GEP) 249106c3fb27SDimitry Andric return false; 249206c3fb27SDimitry Andric 249306c3fb27SDimitry Andric auto *Src = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand()); 249406c3fb27SDimitry Andric if (!Src || !Src->hasOneUse() || !L.contains(Src)) 249506c3fb27SDimitry Andric return false; 249606c3fb27SDimitry Andric 249706c3fb27SDimitry Andric Value *SrcPtr = Src->getPointerOperand(); 249806c3fb27SDimitry Andric auto LoopInvariant = [&](Value *V) { return L.isLoopInvariant(V); }; 249906c3fb27SDimitry Andric if (!L.isLoopInvariant(SrcPtr) || !all_of(GEP->indices(), LoopInvariant)) 250006c3fb27SDimitry Andric return false; 250106c3fb27SDimitry Andric 250206c3fb27SDimitry Andric // This can only happen if !AllowSpeculation, otherwise this would already be 250306c3fb27SDimitry Andric // handled. 250406c3fb27SDimitry Andric // FIXME: Should we respect AllowSpeculation in these reassociation folds? 250506c3fb27SDimitry Andric // The flag exists to prevent metadata dropping, which is not relevant here. 250606c3fb27SDimitry Andric if (all_of(Src->indices(), LoopInvariant)) 250706c3fb27SDimitry Andric return false; 250806c3fb27SDimitry Andric 250906c3fb27SDimitry Andric // The swapped GEPs are inbounds if both original GEPs are inbounds 251006c3fb27SDimitry Andric // and the sign of the offsets is the same. For simplicity, only 251106c3fb27SDimitry Andric // handle both offsets being non-negative. 25120fca6ea1SDimitry Andric const DataLayout &DL = GEP->getDataLayout(); 251306c3fb27SDimitry Andric auto NonNegative = [&](Value *V) { 25145f757f3fSDimitry Andric return isKnownNonNegative(V, SimplifyQuery(DL, DT, AC, GEP)); 251506c3fb27SDimitry Andric }; 251606c3fb27SDimitry Andric bool IsInBounds = Src->isInBounds() && GEP->isInBounds() && 251706c3fb27SDimitry Andric all_of(Src->indices(), NonNegative) && 251806c3fb27SDimitry Andric all_of(GEP->indices(), NonNegative); 251906c3fb27SDimitry Andric 252006c3fb27SDimitry Andric BasicBlock *Preheader = L.getLoopPreheader(); 252106c3fb27SDimitry Andric IRBuilder<> Builder(Preheader->getTerminator()); 252206c3fb27SDimitry Andric Value *NewSrc = Builder.CreateGEP(GEP->getSourceElementType(), SrcPtr, 252306c3fb27SDimitry Andric SmallVector<Value *>(GEP->indices()), 252406c3fb27SDimitry Andric "invariant.gep", IsInBounds); 252506c3fb27SDimitry Andric Builder.SetInsertPoint(GEP); 252606c3fb27SDimitry Andric Value *NewGEP = Builder.CreateGEP(Src->getSourceElementType(), NewSrc, 252706c3fb27SDimitry Andric SmallVector<Value *>(Src->indices()), "gep", 252806c3fb27SDimitry Andric IsInBounds); 252906c3fb27SDimitry Andric GEP->replaceAllUsesWith(NewGEP); 253006c3fb27SDimitry Andric eraseInstruction(*GEP, SafetyInfo, MSSAU); 253106c3fb27SDimitry Andric eraseInstruction(*Src, SafetyInfo, MSSAU); 253206c3fb27SDimitry Andric return true; 253306c3fb27SDimitry Andric } 253406c3fb27SDimitry Andric 253506c3fb27SDimitry Andric /// Try to turn things like "LV + C1 < C2" into "LV < C2 - C1". Here 253606c3fb27SDimitry Andric /// C1 and C2 are loop invariants and LV is a loop-variant. 253706c3fb27SDimitry Andric static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS, 253806c3fb27SDimitry Andric Value *InvariantRHS, ICmpInst &ICmp, Loop &L, 253906c3fb27SDimitry Andric ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, 254006c3fb27SDimitry Andric AssumptionCache *AC, DominatorTree *DT) { 254106c3fb27SDimitry Andric assert(ICmpInst::isSigned(Pred) && "Not supported yet!"); 254206c3fb27SDimitry Andric assert(!L.isLoopInvariant(VariantLHS) && "Precondition."); 254306c3fb27SDimitry Andric assert(L.isLoopInvariant(InvariantRHS) && "Precondition."); 254406c3fb27SDimitry Andric 254506c3fb27SDimitry Andric // Try to represent VariantLHS as sum of invariant and variant operands. 254606c3fb27SDimitry Andric using namespace PatternMatch; 254706c3fb27SDimitry Andric Value *VariantOp, *InvariantOp; 254806c3fb27SDimitry Andric if (!match(VariantLHS, m_NSWAdd(m_Value(VariantOp), m_Value(InvariantOp)))) 254906c3fb27SDimitry Andric return false; 255006c3fb27SDimitry Andric 255106c3fb27SDimitry Andric // LHS itself is a loop-variant, try to represent it in the form: 255206c3fb27SDimitry Andric // "VariantOp + InvariantOp". If it is possible, then we can reassociate. 255306c3fb27SDimitry Andric if (L.isLoopInvariant(VariantOp)) 255406c3fb27SDimitry Andric std::swap(VariantOp, InvariantOp); 255506c3fb27SDimitry Andric if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp)) 255606c3fb27SDimitry Andric return false; 255706c3fb27SDimitry Andric 255806c3fb27SDimitry Andric // In order to turn "LV + C1 < C2" into "LV < C2 - C1", we need to be able to 255906c3fb27SDimitry Andric // freely move values from left side of inequality to right side (just as in 256006c3fb27SDimitry Andric // normal linear arithmetics). Overflows make things much more complicated, so 256106c3fb27SDimitry Andric // we want to avoid this. 25620fca6ea1SDimitry Andric auto &DL = L.getHeader()->getDataLayout(); 256306c3fb27SDimitry Andric bool ProvedNoOverflowAfterReassociate = 25645f757f3fSDimitry Andric computeOverflowForSignedSub(InvariantRHS, InvariantOp, 25655f757f3fSDimitry Andric SimplifyQuery(DL, DT, AC, &ICmp)) == 25665f757f3fSDimitry Andric llvm::OverflowResult::NeverOverflows; 256706c3fb27SDimitry Andric if (!ProvedNoOverflowAfterReassociate) 256806c3fb27SDimitry Andric return false; 256906c3fb27SDimitry Andric auto *Preheader = L.getLoopPreheader(); 257006c3fb27SDimitry Andric assert(Preheader && "Loop is not in simplify form?"); 257106c3fb27SDimitry Andric IRBuilder<> Builder(Preheader->getTerminator()); 257206c3fb27SDimitry Andric Value *NewCmpOp = Builder.CreateSub(InvariantRHS, InvariantOp, "invariant.op", 257306c3fb27SDimitry Andric /*HasNUW*/ false, /*HasNSW*/ true); 257406c3fb27SDimitry Andric ICmp.setPredicate(Pred); 257506c3fb27SDimitry Andric ICmp.setOperand(0, VariantOp); 257606c3fb27SDimitry Andric ICmp.setOperand(1, NewCmpOp); 257706c3fb27SDimitry Andric eraseInstruction(cast<Instruction>(*VariantLHS), SafetyInfo, MSSAU); 257806c3fb27SDimitry Andric return true; 257906c3fb27SDimitry Andric } 258006c3fb27SDimitry Andric 258106c3fb27SDimitry Andric /// Try to reassociate and hoist the following two patterns: 258206c3fb27SDimitry Andric /// LV - C1 < C2 --> LV < C1 + C2, 258306c3fb27SDimitry Andric /// C1 - LV < C2 --> LV > C1 - C2. 258406c3fb27SDimitry Andric static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS, 258506c3fb27SDimitry Andric Value *InvariantRHS, ICmpInst &ICmp, Loop &L, 258606c3fb27SDimitry Andric ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, 258706c3fb27SDimitry Andric AssumptionCache *AC, DominatorTree *DT) { 258806c3fb27SDimitry Andric assert(ICmpInst::isSigned(Pred) && "Not supported yet!"); 258906c3fb27SDimitry Andric assert(!L.isLoopInvariant(VariantLHS) && "Precondition."); 259006c3fb27SDimitry Andric assert(L.isLoopInvariant(InvariantRHS) && "Precondition."); 259106c3fb27SDimitry Andric 259206c3fb27SDimitry Andric // Try to represent VariantLHS as sum of invariant and variant operands. 259306c3fb27SDimitry Andric using namespace PatternMatch; 259406c3fb27SDimitry Andric Value *VariantOp, *InvariantOp; 259506c3fb27SDimitry Andric if (!match(VariantLHS, m_NSWSub(m_Value(VariantOp), m_Value(InvariantOp)))) 259606c3fb27SDimitry Andric return false; 259706c3fb27SDimitry Andric 259806c3fb27SDimitry Andric bool VariantSubtracted = false; 259906c3fb27SDimitry Andric // LHS itself is a loop-variant, try to represent it in the form: 260006c3fb27SDimitry Andric // "VariantOp + InvariantOp". If it is possible, then we can reassociate. If 260106c3fb27SDimitry Andric // the variant operand goes with minus, we use a slightly different scheme. 260206c3fb27SDimitry Andric if (L.isLoopInvariant(VariantOp)) { 260306c3fb27SDimitry Andric std::swap(VariantOp, InvariantOp); 260406c3fb27SDimitry Andric VariantSubtracted = true; 260506c3fb27SDimitry Andric Pred = ICmpInst::getSwappedPredicate(Pred); 260606c3fb27SDimitry Andric } 260706c3fb27SDimitry Andric if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp)) 260806c3fb27SDimitry Andric return false; 260906c3fb27SDimitry Andric 261006c3fb27SDimitry Andric // In order to turn "LV - C1 < C2" into "LV < C2 + C1", we need to be able to 261106c3fb27SDimitry Andric // freely move values from left side of inequality to right side (just as in 261206c3fb27SDimitry Andric // normal linear arithmetics). Overflows make things much more complicated, so 261306c3fb27SDimitry Andric // we want to avoid this. Likewise, for "C1 - LV < C2" we need to prove that 261406c3fb27SDimitry Andric // "C1 - C2" does not overflow. 26150fca6ea1SDimitry Andric auto &DL = L.getHeader()->getDataLayout(); 26165f757f3fSDimitry Andric SimplifyQuery SQ(DL, DT, AC, &ICmp); 261706c3fb27SDimitry Andric if (VariantSubtracted) { 261806c3fb27SDimitry Andric // C1 - LV < C2 --> LV > C1 - C2 26195f757f3fSDimitry Andric if (computeOverflowForSignedSub(InvariantOp, InvariantRHS, SQ) != 26205f757f3fSDimitry Andric llvm::OverflowResult::NeverOverflows) 262106c3fb27SDimitry Andric return false; 262206c3fb27SDimitry Andric } else { 262306c3fb27SDimitry Andric // LV - C1 < C2 --> LV < C1 + C2 26245f757f3fSDimitry Andric if (computeOverflowForSignedAdd(InvariantOp, InvariantRHS, SQ) != 26255f757f3fSDimitry Andric llvm::OverflowResult::NeverOverflows) 262606c3fb27SDimitry Andric return false; 262706c3fb27SDimitry Andric } 262806c3fb27SDimitry Andric auto *Preheader = L.getLoopPreheader(); 262906c3fb27SDimitry Andric assert(Preheader && "Loop is not in simplify form?"); 263006c3fb27SDimitry Andric IRBuilder<> Builder(Preheader->getTerminator()); 263106c3fb27SDimitry Andric Value *NewCmpOp = 263206c3fb27SDimitry Andric VariantSubtracted 263306c3fb27SDimitry Andric ? Builder.CreateSub(InvariantOp, InvariantRHS, "invariant.op", 263406c3fb27SDimitry Andric /*HasNUW*/ false, /*HasNSW*/ true) 263506c3fb27SDimitry Andric : Builder.CreateAdd(InvariantOp, InvariantRHS, "invariant.op", 263606c3fb27SDimitry Andric /*HasNUW*/ false, /*HasNSW*/ true); 263706c3fb27SDimitry Andric ICmp.setPredicate(Pred); 263806c3fb27SDimitry Andric ICmp.setOperand(0, VariantOp); 263906c3fb27SDimitry Andric ICmp.setOperand(1, NewCmpOp); 264006c3fb27SDimitry Andric eraseInstruction(cast<Instruction>(*VariantLHS), SafetyInfo, MSSAU); 264106c3fb27SDimitry Andric return true; 264206c3fb27SDimitry Andric } 264306c3fb27SDimitry Andric 264406c3fb27SDimitry Andric /// Reassociate and hoist add/sub expressions. 264506c3fb27SDimitry Andric static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, 264606c3fb27SDimitry Andric MemorySSAUpdater &MSSAU, AssumptionCache *AC, 264706c3fb27SDimitry Andric DominatorTree *DT) { 264806c3fb27SDimitry Andric using namespace PatternMatch; 264906c3fb27SDimitry Andric ICmpInst::Predicate Pred; 265006c3fb27SDimitry Andric Value *LHS, *RHS; 265106c3fb27SDimitry Andric if (!match(&I, m_ICmp(Pred, m_Value(LHS), m_Value(RHS)))) 265206c3fb27SDimitry Andric return false; 265306c3fb27SDimitry Andric 265406c3fb27SDimitry Andric // TODO: Support unsigned predicates? 265506c3fb27SDimitry Andric if (!ICmpInst::isSigned(Pred)) 265606c3fb27SDimitry Andric return false; 265706c3fb27SDimitry Andric 265806c3fb27SDimitry Andric // Put variant operand to LHS position. 265906c3fb27SDimitry Andric if (L.isLoopInvariant(LHS)) { 266006c3fb27SDimitry Andric std::swap(LHS, RHS); 266106c3fb27SDimitry Andric Pred = ICmpInst::getSwappedPredicate(Pred); 266206c3fb27SDimitry Andric } 266306c3fb27SDimitry Andric // We want to delete the initial operation after reassociation, so only do it 266406c3fb27SDimitry Andric // if it has no other uses. 266506c3fb27SDimitry Andric if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS) || !LHS->hasOneUse()) 266606c3fb27SDimitry Andric return false; 266706c3fb27SDimitry Andric 266806c3fb27SDimitry Andric // TODO: We could go with smarter context, taking common dominator of all I's 266906c3fb27SDimitry Andric // users instead of I itself. 267006c3fb27SDimitry Andric if (hoistAdd(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT)) 267106c3fb27SDimitry Andric return true; 267206c3fb27SDimitry Andric 267306c3fb27SDimitry Andric if (hoistSub(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT)) 267406c3fb27SDimitry Andric return true; 267506c3fb27SDimitry Andric 267606c3fb27SDimitry Andric return false; 267706c3fb27SDimitry Andric } 267806c3fb27SDimitry Andric 26790fca6ea1SDimitry Andric static bool isReassociableOp(Instruction *I, unsigned IntOpcode, 26800fca6ea1SDimitry Andric unsigned FPOpcode) { 26810fca6ea1SDimitry Andric if (I->getOpcode() == IntOpcode) 26820fca6ea1SDimitry Andric return true; 26830fca6ea1SDimitry Andric if (I->getOpcode() == FPOpcode && I->hasAllowReassoc() && 26840fca6ea1SDimitry Andric I->hasNoSignedZeros()) 26850fca6ea1SDimitry Andric return true; 26860fca6ea1SDimitry Andric return false; 26870fca6ea1SDimitry Andric } 26880fca6ea1SDimitry Andric 26895f757f3fSDimitry Andric /// Try to reassociate expressions like ((A1 * B1) + (A2 * B2) + ...) * C where 26905f757f3fSDimitry Andric /// A1, A2, ... and C are loop invariants into expressions like 26915f757f3fSDimitry Andric /// ((A1 * C * B1) + (A2 * C * B2) + ...) and hoist the (A1 * C), (A2 * C), ... 26925f757f3fSDimitry Andric /// invariant expressions. This functions returns true only if any hoisting has 26935f757f3fSDimitry Andric /// actually occured. 26940fca6ea1SDimitry Andric static bool hoistMulAddAssociation(Instruction &I, Loop &L, 26955f757f3fSDimitry Andric ICFLoopSafetyInfo &SafetyInfo, 26965f757f3fSDimitry Andric MemorySSAUpdater &MSSAU, AssumptionCache *AC, 26975f757f3fSDimitry Andric DominatorTree *DT) { 26980fca6ea1SDimitry Andric if (!isReassociableOp(&I, Instruction::Mul, Instruction::FMul)) 26995f757f3fSDimitry Andric return false; 27000fca6ea1SDimitry Andric Value *VariantOp = I.getOperand(0); 27010fca6ea1SDimitry Andric Value *InvariantOp = I.getOperand(1); 27025f757f3fSDimitry Andric if (L.isLoopInvariant(VariantOp)) 27035f757f3fSDimitry Andric std::swap(VariantOp, InvariantOp); 27045f757f3fSDimitry Andric if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp)) 27055f757f3fSDimitry Andric return false; 27065f757f3fSDimitry Andric Value *Factor = InvariantOp; 27075f757f3fSDimitry Andric 27085f757f3fSDimitry Andric // First, we need to make sure we should do the transformation. 27095f757f3fSDimitry Andric SmallVector<Use *> Changes; 27100fca6ea1SDimitry Andric SmallVector<BinaryOperator *> Adds; 27115f757f3fSDimitry Andric SmallVector<BinaryOperator *> Worklist; 27125f757f3fSDimitry Andric if (BinaryOperator *VariantBinOp = dyn_cast<BinaryOperator>(VariantOp)) 27135f757f3fSDimitry Andric Worklist.push_back(VariantBinOp); 27145f757f3fSDimitry Andric while (!Worklist.empty()) { 27155f757f3fSDimitry Andric BinaryOperator *BO = Worklist.pop_back_val(); 27160fca6ea1SDimitry Andric if (!BO->hasOneUse()) 27175f757f3fSDimitry Andric return false; 27180fca6ea1SDimitry Andric if (isReassociableOp(BO, Instruction::Add, Instruction::FAdd) && 27190fca6ea1SDimitry Andric isa<BinaryOperator>(BO->getOperand(0)) && 27200fca6ea1SDimitry Andric isa<BinaryOperator>(BO->getOperand(1))) { 27210fca6ea1SDimitry Andric Worklist.push_back(cast<BinaryOperator>(BO->getOperand(0))); 27220fca6ea1SDimitry Andric Worklist.push_back(cast<BinaryOperator>(BO->getOperand(1))); 27230fca6ea1SDimitry Andric Adds.push_back(BO); 27245f757f3fSDimitry Andric continue; 27255f757f3fSDimitry Andric } 27260fca6ea1SDimitry Andric if (!isReassociableOp(BO, Instruction::Mul, Instruction::FMul) || 27270fca6ea1SDimitry Andric L.isLoopInvariant(BO)) 27285f757f3fSDimitry Andric return false; 27295f757f3fSDimitry Andric Use &U0 = BO->getOperandUse(0); 27305f757f3fSDimitry Andric Use &U1 = BO->getOperandUse(1); 27315f757f3fSDimitry Andric if (L.isLoopInvariant(U0)) 27325f757f3fSDimitry Andric Changes.push_back(&U0); 27335f757f3fSDimitry Andric else if (L.isLoopInvariant(U1)) 27345f757f3fSDimitry Andric Changes.push_back(&U1); 27355f757f3fSDimitry Andric else 27365f757f3fSDimitry Andric return false; 27370fca6ea1SDimitry Andric unsigned Limit = I.getType()->isIntOrIntVectorTy() 27380fca6ea1SDimitry Andric ? IntAssociationUpperLimit 27390fca6ea1SDimitry Andric : FPAssociationUpperLimit; 27400fca6ea1SDimitry Andric if (Changes.size() > Limit) 27415f757f3fSDimitry Andric return false; 27425f757f3fSDimitry Andric } 27435f757f3fSDimitry Andric if (Changes.empty()) 27445f757f3fSDimitry Andric return false; 27455f757f3fSDimitry Andric 27460fca6ea1SDimitry Andric // Drop the poison flags for any adds we looked through. 27470fca6ea1SDimitry Andric if (I.getType()->isIntOrIntVectorTy()) { 27480fca6ea1SDimitry Andric for (auto *Add : Adds) 27490fca6ea1SDimitry Andric Add->dropPoisonGeneratingFlags(); 27500fca6ea1SDimitry Andric } 27510fca6ea1SDimitry Andric 27525f757f3fSDimitry Andric // We know we should do it so let's do the transformation. 27535f757f3fSDimitry Andric auto *Preheader = L.getLoopPreheader(); 27545f757f3fSDimitry Andric assert(Preheader && "Loop is not in simplify form?"); 27555f757f3fSDimitry Andric IRBuilder<> Builder(Preheader->getTerminator()); 27565f757f3fSDimitry Andric for (auto *U : Changes) { 27575f757f3fSDimitry Andric assert(L.isLoopInvariant(U->get())); 27580fca6ea1SDimitry Andric auto *Ins = cast<BinaryOperator>(U->getUser()); 27590fca6ea1SDimitry Andric Value *Mul; 27600fca6ea1SDimitry Andric if (I.getType()->isIntOrIntVectorTy()) { 27610fca6ea1SDimitry Andric Mul = Builder.CreateMul(U->get(), Factor, "factor.op.mul"); 27620fca6ea1SDimitry Andric // Drop the poison flags on the original multiply. 27630fca6ea1SDimitry Andric Ins->dropPoisonGeneratingFlags(); 27640fca6ea1SDimitry Andric } else 27650fca6ea1SDimitry Andric Mul = Builder.CreateFMulFMF(U->get(), Factor, Ins, "factor.op.fmul"); 27660fca6ea1SDimitry Andric 27670fca6ea1SDimitry Andric // Rewrite the reassociable instruction. 27680fca6ea1SDimitry Andric unsigned OpIdx = U->getOperandNo(); 27690fca6ea1SDimitry Andric auto *LHS = OpIdx == 0 ? Mul : Ins->getOperand(0); 27700fca6ea1SDimitry Andric auto *RHS = OpIdx == 1 ? Mul : Ins->getOperand(1); 27710fca6ea1SDimitry Andric auto *NewBO = BinaryOperator::Create(Ins->getOpcode(), LHS, RHS, 27720fca6ea1SDimitry Andric Ins->getName() + ".reass", Ins); 27730fca6ea1SDimitry Andric NewBO->copyIRFlags(Ins); 27740fca6ea1SDimitry Andric if (VariantOp == Ins) 27750fca6ea1SDimitry Andric VariantOp = NewBO; 27760fca6ea1SDimitry Andric Ins->replaceAllUsesWith(NewBO); 27770fca6ea1SDimitry Andric eraseInstruction(*Ins, SafetyInfo, MSSAU); 27785f757f3fSDimitry Andric } 27790fca6ea1SDimitry Andric 27805f757f3fSDimitry Andric I.replaceAllUsesWith(VariantOp); 27815f757f3fSDimitry Andric eraseInstruction(I, SafetyInfo, MSSAU); 27825f757f3fSDimitry Andric return true; 27835f757f3fSDimitry Andric } 27845f757f3fSDimitry Andric 278506c3fb27SDimitry Andric static bool hoistArithmetics(Instruction &I, Loop &L, 278606c3fb27SDimitry Andric ICFLoopSafetyInfo &SafetyInfo, 278706c3fb27SDimitry Andric MemorySSAUpdater &MSSAU, AssumptionCache *AC, 278806c3fb27SDimitry Andric DominatorTree *DT) { 278906c3fb27SDimitry Andric // Optimize complex patterns, such as (x < INV1 && x < INV2), turning them 279006c3fb27SDimitry Andric // into (x < min(INV1, INV2)), and hoisting the invariant part of this 279106c3fb27SDimitry Andric // expression out of the loop. 279206c3fb27SDimitry Andric if (hoistMinMax(I, L, SafetyInfo, MSSAU)) { 279306c3fb27SDimitry Andric ++NumHoisted; 279406c3fb27SDimitry Andric ++NumMinMaxHoisted; 279506c3fb27SDimitry Andric return true; 279606c3fb27SDimitry Andric } 279706c3fb27SDimitry Andric 279806c3fb27SDimitry Andric // Try to hoist GEPs by reassociation. 279906c3fb27SDimitry Andric if (hoistGEP(I, L, SafetyInfo, MSSAU, AC, DT)) { 280006c3fb27SDimitry Andric ++NumHoisted; 280106c3fb27SDimitry Andric ++NumGEPsHoisted; 280206c3fb27SDimitry Andric return true; 280306c3fb27SDimitry Andric } 280406c3fb27SDimitry Andric 280506c3fb27SDimitry Andric // Try to hoist add/sub's by reassociation. 280606c3fb27SDimitry Andric if (hoistAddSub(I, L, SafetyInfo, MSSAU, AC, DT)) { 280706c3fb27SDimitry Andric ++NumHoisted; 280806c3fb27SDimitry Andric ++NumAddSubHoisted; 280906c3fb27SDimitry Andric return true; 281006c3fb27SDimitry Andric } 281106c3fb27SDimitry Andric 28120fca6ea1SDimitry Andric bool IsInt = I.getType()->isIntOrIntVectorTy(); 28130fca6ea1SDimitry Andric if (hoistMulAddAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) { 28145f757f3fSDimitry Andric ++NumHoisted; 28150fca6ea1SDimitry Andric if (IsInt) 28160fca6ea1SDimitry Andric ++NumIntAssociationsHoisted; 28170fca6ea1SDimitry Andric else 28185f757f3fSDimitry Andric ++NumFPAssociationsHoisted; 28195f757f3fSDimitry Andric return true; 28205f757f3fSDimitry Andric } 28215f757f3fSDimitry Andric 282206c3fb27SDimitry Andric return false; 282306c3fb27SDimitry Andric } 282406c3fb27SDimitry Andric 28250b57cec5SDimitry Andric /// Little predicate that returns true if the specified basic block is in 28260b57cec5SDimitry Andric /// a subloop of the current one, not the current one itself. 28270b57cec5SDimitry Andric /// 28280b57cec5SDimitry Andric static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) { 28290b57cec5SDimitry Andric assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); 28300b57cec5SDimitry Andric return LI->getLoopFor(BB) != CurLoop; 28310b57cec5SDimitry Andric } 2832