10b57cec5SDimitry Andric //===-- LoopUtils.cpp - Loop Utility functions -------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file defines common loop utility functions. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h" 145ffd83dbSDimitry Andric #include "llvm/ADT/DenseSet.h" 155ffd83dbSDimitry Andric #include "llvm/ADT/PriorityWorklist.h" 160b57cec5SDimitry Andric #include "llvm/ADT/ScopeExit.h" 175ffd83dbSDimitry Andric #include "llvm/ADT/SetVector.h" 185ffd83dbSDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 195ffd83dbSDimitry Andric #include "llvm/ADT/SmallVector.h" 200b57cec5SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h" 210b57cec5SDimitry Andric #include "llvm/Analysis/BasicAliasAnalysis.h" 220b57cec5SDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h" 230b57cec5SDimitry Andric #include "llvm/Analysis/GlobalsModRef.h" 2404eeddc0SDimitry Andric #include "llvm/Analysis/InstSimplifyFolder.h" 255ffd83dbSDimitry Andric #include "llvm/Analysis/LoopAccessAnalysis.h" 260b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 270b57cec5SDimitry Andric #include "llvm/Analysis/LoopPass.h" 288bcb0991SDimitry Andric #include "llvm/Analysis/MemorySSA.h" 290b57cec5SDimitry Andric #include "llvm/Analysis/MemorySSAUpdater.h" 300b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 310b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 320b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h" 330b57cec5SDimitry Andric #include "llvm/IR/DIBuilder.h" 340b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 350b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 360b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 375ffd83dbSDimitry Andric #include "llvm/IR/MDBuilder.h" 380b57cec5SDimitry Andric #include "llvm/IR/Module.h" 390b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h" 40bdd1243dSDimitry Andric #include "llvm/IR/ProfDataUtils.h" 410b57cec5SDimitry Andric #include "llvm/IR/ValueHandle.h" 42480093f4SDimitry Andric #include "llvm/InitializePasses.h" 430b57cec5SDimitry Andric #include "llvm/Pass.h" 440b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 450b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 465ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/Local.h" 475ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 480b57cec5SDimitry Andric 490b57cec5SDimitry Andric using namespace llvm; 500b57cec5SDimitry Andric using namespace llvm::PatternMatch; 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric #define DEBUG_TYPE "loop-utils" 530b57cec5SDimitry Andric 540b57cec5SDimitry Andric static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced"; 558bcb0991SDimitry Andric static const char *LLVMLoopDisableLICM = "llvm.licm.disable"; 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, 580b57cec5SDimitry Andric MemorySSAUpdater *MSSAU, 590b57cec5SDimitry Andric bool PreserveLCSSA) { 600b57cec5SDimitry Andric bool Changed = false; 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric // We re-use a vector for the in-loop predecesosrs. 630b57cec5SDimitry Andric SmallVector<BasicBlock *, 4> InLoopPredecessors; 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric auto RewriteExit = [&](BasicBlock *BB) { 660b57cec5SDimitry Andric assert(InLoopPredecessors.empty() && 670b57cec5SDimitry Andric "Must start with an empty predecessors list!"); 680b57cec5SDimitry Andric auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); }); 690b57cec5SDimitry Andric 700b57cec5SDimitry Andric // See if there are any non-loop predecessors of this exit block and 710b57cec5SDimitry Andric // keep track of the in-loop predecessors. 720b57cec5SDimitry Andric bool IsDedicatedExit = true; 730b57cec5SDimitry Andric for (auto *PredBB : predecessors(BB)) 740b57cec5SDimitry Andric if (L->contains(PredBB)) { 750b57cec5SDimitry Andric if (isa<IndirectBrInst>(PredBB->getTerminator())) 760b57cec5SDimitry Andric // We cannot rewrite exiting edges from an indirectbr. 770b57cec5SDimitry Andric return false; 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric InLoopPredecessors.push_back(PredBB); 800b57cec5SDimitry Andric } else { 810b57cec5SDimitry Andric IsDedicatedExit = false; 820b57cec5SDimitry Andric } 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!"); 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric // Nothing to do if this is already a dedicated exit. 870b57cec5SDimitry Andric if (IsDedicatedExit) 880b57cec5SDimitry Andric return false; 890b57cec5SDimitry Andric 900b57cec5SDimitry Andric auto *NewExitBB = SplitBlockPredecessors( 910b57cec5SDimitry Andric BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA); 920b57cec5SDimitry Andric 930b57cec5SDimitry Andric if (!NewExitBB) 940b57cec5SDimitry Andric LLVM_DEBUG( 950b57cec5SDimitry Andric dbgs() << "WARNING: Can't create a dedicated exit block for loop: " 960b57cec5SDimitry Andric << *L << "\n"); 970b57cec5SDimitry Andric else 980b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " 990b57cec5SDimitry Andric << NewExitBB->getName() << "\n"); 1000b57cec5SDimitry Andric return true; 1010b57cec5SDimitry Andric }; 1020b57cec5SDimitry Andric 1030b57cec5SDimitry Andric // Walk the exit blocks directly rather than building up a data structure for 1040b57cec5SDimitry Andric // them, but only visit each one once. 1050b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 4> Visited; 1060b57cec5SDimitry Andric for (auto *BB : L->blocks()) 1070b57cec5SDimitry Andric for (auto *SuccBB : successors(BB)) { 1080b57cec5SDimitry Andric // We're looking for exit blocks so skip in-loop successors. 1090b57cec5SDimitry Andric if (L->contains(SuccBB)) 1100b57cec5SDimitry Andric continue; 1110b57cec5SDimitry Andric 1120b57cec5SDimitry Andric // Visit each exit block exactly once. 1130b57cec5SDimitry Andric if (!Visited.insert(SuccBB).second) 1140b57cec5SDimitry Andric continue; 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andric Changed |= RewriteExit(SuccBB); 1170b57cec5SDimitry Andric } 1180b57cec5SDimitry Andric 1190b57cec5SDimitry Andric return Changed; 1200b57cec5SDimitry Andric } 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric /// Returns the instructions that use values defined in the loop. 1230b57cec5SDimitry Andric SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { 1240b57cec5SDimitry Andric SmallVector<Instruction *, 8> UsedOutside; 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric for (auto *Block : L->getBlocks()) 1270b57cec5SDimitry Andric // FIXME: I believe that this could use copy_if if the Inst reference could 1280b57cec5SDimitry Andric // be adapted into a pointer. 1290b57cec5SDimitry Andric for (auto &Inst : *Block) { 1300b57cec5SDimitry Andric auto Users = Inst.users(); 1310b57cec5SDimitry Andric if (any_of(Users, [&](User *U) { 1320b57cec5SDimitry Andric auto *Use = cast<Instruction>(U); 1330b57cec5SDimitry Andric return !L->contains(Use->getParent()); 1340b57cec5SDimitry Andric })) 1350b57cec5SDimitry Andric UsedOutside.push_back(&Inst); 1360b57cec5SDimitry Andric } 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric return UsedOutside; 1390b57cec5SDimitry Andric } 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { 1420b57cec5SDimitry Andric // By definition, all loop passes need the LoopInfo analysis and the 1430b57cec5SDimitry Andric // Dominator tree it depends on. Because they all participate in the loop 1440b57cec5SDimitry Andric // pass manager, they must also preserve these. 1450b57cec5SDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 1460b57cec5SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>(); 1470b57cec5SDimitry Andric AU.addRequired<LoopInfoWrapperPass>(); 1480b57cec5SDimitry Andric AU.addPreserved<LoopInfoWrapperPass>(); 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric // We must also preserve LoopSimplify and LCSSA. We locally access their IDs 1510b57cec5SDimitry Andric // here because users shouldn't directly get them from this header. 1520b57cec5SDimitry Andric extern char &LoopSimplifyID; 1530b57cec5SDimitry Andric extern char &LCSSAID; 1540b57cec5SDimitry Andric AU.addRequiredID(LoopSimplifyID); 1550b57cec5SDimitry Andric AU.addPreservedID(LoopSimplifyID); 1560b57cec5SDimitry Andric AU.addRequiredID(LCSSAID); 1570b57cec5SDimitry Andric AU.addPreservedID(LCSSAID); 1580b57cec5SDimitry Andric // This is used in the LPPassManager to perform LCSSA verification on passes 1590b57cec5SDimitry Andric // which preserve lcssa form 1600b57cec5SDimitry Andric AU.addRequired<LCSSAVerificationPass>(); 1610b57cec5SDimitry Andric AU.addPreserved<LCSSAVerificationPass>(); 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric // Loop passes are designed to run inside of a loop pass manager which means 1640b57cec5SDimitry Andric // that any function analyses they require must be required by the first loop 1650b57cec5SDimitry Andric // pass in the manager (so that it is computed before the loop pass manager 1660b57cec5SDimitry Andric // runs) and preserved by all loop pasess in the manager. To make this 1670b57cec5SDimitry Andric // reasonably robust, the set needed for most loop passes is maintained here. 1680b57cec5SDimitry Andric // If your loop pass requires an analysis not listed here, you will need to 1690b57cec5SDimitry Andric // carefully audit the loop pass manager nesting structure that results. 1700b57cec5SDimitry Andric AU.addRequired<AAResultsWrapperPass>(); 1710b57cec5SDimitry Andric AU.addPreserved<AAResultsWrapperPass>(); 1720b57cec5SDimitry Andric AU.addPreserved<BasicAAWrapperPass>(); 1730b57cec5SDimitry Andric AU.addPreserved<GlobalsAAWrapperPass>(); 1740b57cec5SDimitry Andric AU.addPreserved<SCEVAAWrapperPass>(); 1750b57cec5SDimitry Andric AU.addRequired<ScalarEvolutionWrapperPass>(); 1760b57cec5SDimitry Andric AU.addPreserved<ScalarEvolutionWrapperPass>(); 1778bcb0991SDimitry Andric // FIXME: When all loop passes preserve MemorySSA, it can be required and 1788bcb0991SDimitry Andric // preserved here instead of the individual handling in each pass. 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric /// Manually defined generic "LoopPass" dependency initialization. This is used 1820b57cec5SDimitry Andric /// to initialize the exact set of passes from above in \c 1830b57cec5SDimitry Andric /// getLoopAnalysisUsage. It can be used within a loop pass's initialization 1840b57cec5SDimitry Andric /// with: 1850b57cec5SDimitry Andric /// 1860b57cec5SDimitry Andric /// INITIALIZE_PASS_DEPENDENCY(LoopPass) 1870b57cec5SDimitry Andric /// 1880b57cec5SDimitry Andric /// As-if "LoopPass" were a pass. 1890b57cec5SDimitry Andric void llvm::initializeLoopPassPass(PassRegistry &Registry) { 1900b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1910b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1920b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 1930b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) 1940b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 1950b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) 1960b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 1970b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) 1980b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 1998bcb0991SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 2008bcb0991SDimitry Andric } 2018bcb0991SDimitry Andric 2028bcb0991SDimitry Andric /// Create MDNode for input string. 2038bcb0991SDimitry Andric static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) { 2048bcb0991SDimitry Andric LLVMContext &Context = TheLoop->getHeader()->getContext(); 2058bcb0991SDimitry Andric Metadata *MDs[] = { 2068bcb0991SDimitry Andric MDString::get(Context, Name), 2078bcb0991SDimitry Andric ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))}; 2088bcb0991SDimitry Andric return MDNode::get(Context, MDs); 2098bcb0991SDimitry Andric } 2108bcb0991SDimitry Andric 2118bcb0991SDimitry Andric /// Set input string into loop metadata by keeping other values intact. 2128bcb0991SDimitry Andric /// If the string is already in loop metadata update value if it is 2138bcb0991SDimitry Andric /// different. 2148bcb0991SDimitry Andric void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD, 2158bcb0991SDimitry Andric unsigned V) { 2168bcb0991SDimitry Andric SmallVector<Metadata *, 4> MDs(1); 2178bcb0991SDimitry Andric // If the loop already has metadata, retain it. 2188bcb0991SDimitry Andric MDNode *LoopID = TheLoop->getLoopID(); 2198bcb0991SDimitry Andric if (LoopID) { 2208bcb0991SDimitry Andric for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { 2218bcb0991SDimitry Andric MDNode *Node = cast<MDNode>(LoopID->getOperand(i)); 2228bcb0991SDimitry Andric // If it is of form key = value, try to parse it. 2238bcb0991SDimitry Andric if (Node->getNumOperands() == 2) { 2248bcb0991SDimitry Andric MDString *S = dyn_cast<MDString>(Node->getOperand(0)); 2250fca6ea1SDimitry Andric if (S && S->getString() == StringMD) { 2268bcb0991SDimitry Andric ConstantInt *IntMD = 2278bcb0991SDimitry Andric mdconst::extract_or_null<ConstantInt>(Node->getOperand(1)); 2288bcb0991SDimitry Andric if (IntMD && IntMD->getSExtValue() == V) 2298bcb0991SDimitry Andric // It is already in place. Do nothing. 2308bcb0991SDimitry Andric return; 2318bcb0991SDimitry Andric // We need to update the value, so just skip it here and it will 2328bcb0991SDimitry Andric // be added after copying other existed nodes. 2338bcb0991SDimitry Andric continue; 2348bcb0991SDimitry Andric } 2358bcb0991SDimitry Andric } 2368bcb0991SDimitry Andric MDs.push_back(Node); 2378bcb0991SDimitry Andric } 2388bcb0991SDimitry Andric } 2398bcb0991SDimitry Andric // Add new metadata. 2408bcb0991SDimitry Andric MDs.push_back(createStringMetadata(TheLoop, StringMD, V)); 2418bcb0991SDimitry Andric // Replace current metadata node with new one. 2428bcb0991SDimitry Andric LLVMContext &Context = TheLoop->getHeader()->getContext(); 2438bcb0991SDimitry Andric MDNode *NewLoopID = MDNode::get(Context, MDs); 2448bcb0991SDimitry Andric // Set operand 0 to refer to the loop id itself. 2458bcb0991SDimitry Andric NewLoopID->replaceOperandWith(0, NewLoopID); 2468bcb0991SDimitry Andric TheLoop->setLoopID(NewLoopID); 2470b57cec5SDimitry Andric } 2480b57cec5SDimitry Andric 249bdd1243dSDimitry Andric std::optional<ElementCount> 250fe6060f1SDimitry Andric llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) { 251bdd1243dSDimitry Andric std::optional<int> Width = 252e8d8bef9SDimitry Andric getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width"); 253e8d8bef9SDimitry Andric 25481ad6265SDimitry Andric if (Width) { 255bdd1243dSDimitry Andric std::optional<int> IsScalable = getOptionalIntLoopAttribute( 256e8d8bef9SDimitry Andric TheLoop, "llvm.loop.vectorize.scalable.enable"); 25781ad6265SDimitry Andric return ElementCount::get(*Width, IsScalable.value_or(false)); 258e8d8bef9SDimitry Andric } 259e8d8bef9SDimitry Andric 260bdd1243dSDimitry Andric return std::nullopt; 261e8d8bef9SDimitry Andric } 262e8d8bef9SDimitry Andric 263bdd1243dSDimitry Andric std::optional<MDNode *> llvm::makeFollowupLoopID( 2640b57cec5SDimitry Andric MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions, 2650b57cec5SDimitry Andric const char *InheritOptionsExceptPrefix, bool AlwaysNew) { 2660b57cec5SDimitry Andric if (!OrigLoopID) { 2670b57cec5SDimitry Andric if (AlwaysNew) 2680b57cec5SDimitry Andric return nullptr; 269bdd1243dSDimitry Andric return std::nullopt; 2700b57cec5SDimitry Andric } 2710b57cec5SDimitry Andric 2720b57cec5SDimitry Andric assert(OrigLoopID->getOperand(0) == OrigLoopID); 2730b57cec5SDimitry Andric 2740b57cec5SDimitry Andric bool InheritAllAttrs = !InheritOptionsExceptPrefix; 2750b57cec5SDimitry Andric bool InheritSomeAttrs = 2760b57cec5SDimitry Andric InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0'; 2770b57cec5SDimitry Andric SmallVector<Metadata *, 8> MDs; 2780b57cec5SDimitry Andric MDs.push_back(nullptr); 2790b57cec5SDimitry Andric 2800b57cec5SDimitry Andric bool Changed = false; 2810b57cec5SDimitry Andric if (InheritAllAttrs || InheritSomeAttrs) { 282e8d8bef9SDimitry Andric for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) { 2830b57cec5SDimitry Andric MDNode *Op = cast<MDNode>(Existing.get()); 2840b57cec5SDimitry Andric 2850b57cec5SDimitry Andric auto InheritThisAttribute = [InheritSomeAttrs, 2860b57cec5SDimitry Andric InheritOptionsExceptPrefix](MDNode *Op) { 2870b57cec5SDimitry Andric if (!InheritSomeAttrs) 2880b57cec5SDimitry Andric return false; 2890b57cec5SDimitry Andric 2900b57cec5SDimitry Andric // Skip malformatted attribute metadata nodes. 2910b57cec5SDimitry Andric if (Op->getNumOperands() == 0) 2920b57cec5SDimitry Andric return true; 2930b57cec5SDimitry Andric Metadata *NameMD = Op->getOperand(0).get(); 2940b57cec5SDimitry Andric if (!isa<MDString>(NameMD)) 2950b57cec5SDimitry Andric return true; 2960b57cec5SDimitry Andric StringRef AttrName = cast<MDString>(NameMD)->getString(); 2970b57cec5SDimitry Andric 2980b57cec5SDimitry Andric // Do not inherit excluded attributes. 2995f757f3fSDimitry Andric return !AttrName.starts_with(InheritOptionsExceptPrefix); 3000b57cec5SDimitry Andric }; 3010b57cec5SDimitry Andric 3020b57cec5SDimitry Andric if (InheritThisAttribute(Op)) 3030b57cec5SDimitry Andric MDs.push_back(Op); 3040b57cec5SDimitry Andric else 3050b57cec5SDimitry Andric Changed = true; 3060b57cec5SDimitry Andric } 3070b57cec5SDimitry Andric } else { 3080b57cec5SDimitry Andric // Modified if we dropped at least one attribute. 3090b57cec5SDimitry Andric Changed = OrigLoopID->getNumOperands() > 1; 3100b57cec5SDimitry Andric } 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric bool HasAnyFollowup = false; 3130b57cec5SDimitry Andric for (StringRef OptionName : FollowupOptions) { 3140b57cec5SDimitry Andric MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName); 3150b57cec5SDimitry Andric if (!FollowupNode) 3160b57cec5SDimitry Andric continue; 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric HasAnyFollowup = true; 319e8d8bef9SDimitry Andric for (const MDOperand &Option : drop_begin(FollowupNode->operands())) { 3200b57cec5SDimitry Andric MDs.push_back(Option.get()); 3210b57cec5SDimitry Andric Changed = true; 3220b57cec5SDimitry Andric } 3230b57cec5SDimitry Andric } 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric // Attributes of the followup loop not specified explicity, so signal to the 3260b57cec5SDimitry Andric // transformation pass to add suitable attributes. 3270b57cec5SDimitry Andric if (!AlwaysNew && !HasAnyFollowup) 328bdd1243dSDimitry Andric return std::nullopt; 3290b57cec5SDimitry Andric 3300b57cec5SDimitry Andric // If no attributes were added or remove, the previous loop Id can be reused. 3310b57cec5SDimitry Andric if (!AlwaysNew && !Changed) 3320b57cec5SDimitry Andric return OrigLoopID; 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andric // No attributes is equivalent to having no !llvm.loop metadata at all. 3350b57cec5SDimitry Andric if (MDs.size() == 1) 3360b57cec5SDimitry Andric return nullptr; 3370b57cec5SDimitry Andric 3380b57cec5SDimitry Andric // Build the new loop ID. 3390b57cec5SDimitry Andric MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs); 3400b57cec5SDimitry Andric FollowupLoopID->replaceOperandWith(0, FollowupLoopID); 3410b57cec5SDimitry Andric return FollowupLoopID; 3420b57cec5SDimitry Andric } 3430b57cec5SDimitry Andric 3440b57cec5SDimitry Andric bool llvm::hasDisableAllTransformsHint(const Loop *L) { 3450b57cec5SDimitry Andric return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced); 3460b57cec5SDimitry Andric } 3470b57cec5SDimitry Andric 3488bcb0991SDimitry Andric bool llvm::hasDisableLICMTransformsHint(const Loop *L) { 3498bcb0991SDimitry Andric return getBooleanLoopAttribute(L, LLVMLoopDisableLICM); 3508bcb0991SDimitry Andric } 3518bcb0991SDimitry Andric 352fe6060f1SDimitry Andric TransformationMode llvm::hasUnrollTransformation(const Loop *L) { 3530b57cec5SDimitry Andric if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable")) 3540b57cec5SDimitry Andric return TM_SuppressedByUser; 3550b57cec5SDimitry Andric 356bdd1243dSDimitry Andric std::optional<int> Count = 3570b57cec5SDimitry Andric getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count"); 35881ad6265SDimitry Andric if (Count) 359bdd1243dSDimitry Andric return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser; 3600b57cec5SDimitry Andric 3610b57cec5SDimitry Andric if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")) 3620b57cec5SDimitry Andric return TM_ForcedByUser; 3630b57cec5SDimitry Andric 3640b57cec5SDimitry Andric if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full")) 3650b57cec5SDimitry Andric return TM_ForcedByUser; 3660b57cec5SDimitry Andric 3670b57cec5SDimitry Andric if (hasDisableAllTransformsHint(L)) 3680b57cec5SDimitry Andric return TM_Disable; 3690b57cec5SDimitry Andric 3700b57cec5SDimitry Andric return TM_Unspecified; 3710b57cec5SDimitry Andric } 3720b57cec5SDimitry Andric 373fe6060f1SDimitry Andric TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) { 3740b57cec5SDimitry Andric if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable")) 3750b57cec5SDimitry Andric return TM_SuppressedByUser; 3760b57cec5SDimitry Andric 377bdd1243dSDimitry Andric std::optional<int> Count = 3780b57cec5SDimitry Andric getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count"); 37981ad6265SDimitry Andric if (Count) 380bdd1243dSDimitry Andric return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser; 3810b57cec5SDimitry Andric 3820b57cec5SDimitry Andric if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable")) 3830b57cec5SDimitry Andric return TM_ForcedByUser; 3840b57cec5SDimitry Andric 3850b57cec5SDimitry Andric if (hasDisableAllTransformsHint(L)) 3860b57cec5SDimitry Andric return TM_Disable; 3870b57cec5SDimitry Andric 3880b57cec5SDimitry Andric return TM_Unspecified; 3890b57cec5SDimitry Andric } 3900b57cec5SDimitry Andric 391fe6060f1SDimitry Andric TransformationMode llvm::hasVectorizeTransformation(const Loop *L) { 392bdd1243dSDimitry Andric std::optional<bool> Enable = 3930b57cec5SDimitry Andric getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable"); 3940b57cec5SDimitry Andric 3950b57cec5SDimitry Andric if (Enable == false) 3960b57cec5SDimitry Andric return TM_SuppressedByUser; 3970b57cec5SDimitry Andric 398bdd1243dSDimitry Andric std::optional<ElementCount> VectorizeWidth = 399e8d8bef9SDimitry Andric getOptionalElementCountLoopAttribute(L); 400bdd1243dSDimitry Andric std::optional<int> InterleaveCount = 4010b57cec5SDimitry Andric getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count"); 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric // 'Forcing' vector width and interleave count to one effectively disables 4040b57cec5SDimitry Andric // this tranformation. 405e8d8bef9SDimitry Andric if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() && 406e8d8bef9SDimitry Andric InterleaveCount == 1) 4070b57cec5SDimitry Andric return TM_SuppressedByUser; 4080b57cec5SDimitry Andric 4090b57cec5SDimitry Andric if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized")) 4100b57cec5SDimitry Andric return TM_Disable; 4110b57cec5SDimitry Andric 4120b57cec5SDimitry Andric if (Enable == true) 4130b57cec5SDimitry Andric return TM_ForcedByUser; 4140b57cec5SDimitry Andric 415e8d8bef9SDimitry Andric if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1) 4160b57cec5SDimitry Andric return TM_Disable; 4170b57cec5SDimitry Andric 418e8d8bef9SDimitry Andric if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1) 4190b57cec5SDimitry Andric return TM_Enable; 4200b57cec5SDimitry Andric 4210b57cec5SDimitry Andric if (hasDisableAllTransformsHint(L)) 4220b57cec5SDimitry Andric return TM_Disable; 4230b57cec5SDimitry Andric 4240b57cec5SDimitry Andric return TM_Unspecified; 4250b57cec5SDimitry Andric } 4260b57cec5SDimitry Andric 427fe6060f1SDimitry Andric TransformationMode llvm::hasDistributeTransformation(const Loop *L) { 4280b57cec5SDimitry Andric if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable")) 4290b57cec5SDimitry Andric return TM_ForcedByUser; 4300b57cec5SDimitry Andric 4310b57cec5SDimitry Andric if (hasDisableAllTransformsHint(L)) 4320b57cec5SDimitry Andric return TM_Disable; 4330b57cec5SDimitry Andric 4340b57cec5SDimitry Andric return TM_Unspecified; 4350b57cec5SDimitry Andric } 4360b57cec5SDimitry Andric 437fe6060f1SDimitry Andric TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) { 4380b57cec5SDimitry Andric if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable")) 4390b57cec5SDimitry Andric return TM_SuppressedByUser; 4400b57cec5SDimitry Andric 4410b57cec5SDimitry Andric if (hasDisableAllTransformsHint(L)) 4420b57cec5SDimitry Andric return TM_Disable; 4430b57cec5SDimitry Andric 4440b57cec5SDimitry Andric return TM_Unspecified; 4450b57cec5SDimitry Andric } 4460b57cec5SDimitry Andric 4470b57cec5SDimitry Andric /// Does a BFS from a given node to all of its children inside a given loop. 4480b57cec5SDimitry Andric /// The returned vector of nodes includes the starting point. 4490b57cec5SDimitry Andric SmallVector<DomTreeNode *, 16> 4500b57cec5SDimitry Andric llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) { 4510b57cec5SDimitry Andric SmallVector<DomTreeNode *, 16> Worklist; 4520b57cec5SDimitry Andric auto AddRegionToWorklist = [&](DomTreeNode *DTN) { 4530b57cec5SDimitry Andric // Only include subregions in the top level loop. 4540b57cec5SDimitry Andric BasicBlock *BB = DTN->getBlock(); 4550b57cec5SDimitry Andric if (CurLoop->contains(BB)) 4560b57cec5SDimitry Andric Worklist.push_back(DTN); 4570b57cec5SDimitry Andric }; 4580b57cec5SDimitry Andric 4590b57cec5SDimitry Andric AddRegionToWorklist(N); 4600b57cec5SDimitry Andric 4615ffd83dbSDimitry Andric for (size_t I = 0; I < Worklist.size(); I++) { 4625ffd83dbSDimitry Andric for (DomTreeNode *Child : Worklist[I]->children()) 4630b57cec5SDimitry Andric AddRegionToWorklist(Child); 4645ffd83dbSDimitry Andric } 4650b57cec5SDimitry Andric 4660b57cec5SDimitry Andric return Worklist; 4670b57cec5SDimitry Andric } 4680b57cec5SDimitry Andric 46906c3fb27SDimitry Andric bool llvm::isAlmostDeadIV(PHINode *PN, BasicBlock *LatchBlock, Value *Cond) { 47006c3fb27SDimitry Andric int LatchIdx = PN->getBasicBlockIndex(LatchBlock); 4710fca6ea1SDimitry Andric assert(LatchIdx != -1 && "LatchBlock is not a case in this PHINode"); 47206c3fb27SDimitry Andric Value *IncV = PN->getIncomingValue(LatchIdx); 47306c3fb27SDimitry Andric 47406c3fb27SDimitry Andric for (User *U : PN->users()) 47506c3fb27SDimitry Andric if (U != Cond && U != IncV) return false; 47606c3fb27SDimitry Andric 47706c3fb27SDimitry Andric for (User *U : IncV->users()) 47806c3fb27SDimitry Andric if (U != Cond && U != PN) return false; 47906c3fb27SDimitry Andric return true; 48006c3fb27SDimitry Andric } 48106c3fb27SDimitry Andric 48206c3fb27SDimitry Andric 4835ffd83dbSDimitry Andric void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, 4845ffd83dbSDimitry Andric LoopInfo *LI, MemorySSA *MSSA) { 4850b57cec5SDimitry Andric assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!"); 4860b57cec5SDimitry Andric auto *Preheader = L->getLoopPreheader(); 4870b57cec5SDimitry Andric assert(Preheader && "Preheader should exist!"); 4880b57cec5SDimitry Andric 4895ffd83dbSDimitry Andric std::unique_ptr<MemorySSAUpdater> MSSAU; 4905ffd83dbSDimitry Andric if (MSSA) 4915ffd83dbSDimitry Andric MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 4925ffd83dbSDimitry Andric 4930b57cec5SDimitry Andric // Now that we know the removal is safe, remove the loop by changing the 4940b57cec5SDimitry Andric // branch from the preheader to go to the single exit block. 4950b57cec5SDimitry Andric // 4960b57cec5SDimitry Andric // Because we're deleting a large chunk of code at once, the sequence in which 4970b57cec5SDimitry Andric // we remove things is very important to avoid invalidation issues. 4980b57cec5SDimitry Andric 4990b57cec5SDimitry Andric // Tell ScalarEvolution that the loop is deleted. Do this before 5000b57cec5SDimitry Andric // deleting the loop so that ScalarEvolution can look at the loop 5010b57cec5SDimitry Andric // to determine what it needs to clean up. 502bdd1243dSDimitry Andric if (SE) { 5030b57cec5SDimitry Andric SE->forgetLoop(L); 504bdd1243dSDimitry Andric SE->forgetBlockAndLoopDispositions(); 505bdd1243dSDimitry Andric } 5060b57cec5SDimitry Andric 50781ad6265SDimitry Andric Instruction *OldTerm = Preheader->getTerminator(); 50881ad6265SDimitry Andric assert(!OldTerm->mayHaveSideEffects() && 50981ad6265SDimitry Andric "Preheader must end with a side-effect-free terminator"); 51081ad6265SDimitry Andric assert(OldTerm->getNumSuccessors() == 1 && 51181ad6265SDimitry Andric "Preheader must have a single successor"); 5120b57cec5SDimitry Andric // Connect the preheader to the exit block. Keep the old edge to the header 5130b57cec5SDimitry Andric // around to perform the dominator tree update in two separate steps 5140b57cec5SDimitry Andric // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge 5150b57cec5SDimitry Andric // preheader -> header. 5160b57cec5SDimitry Andric // 5170b57cec5SDimitry Andric // 5180b57cec5SDimitry Andric // 0. Preheader 1. Preheader 2. Preheader 5190b57cec5SDimitry Andric // | | | | 5200b57cec5SDimitry Andric // V | V | 5210b57cec5SDimitry Andric // Header <--\ | Header <--\ | Header <--\ 5220b57cec5SDimitry Andric // | | | | | | | | | | | 5230b57cec5SDimitry Andric // | V | | | V | | | V | 5240b57cec5SDimitry Andric // | Body --/ | | Body --/ | | Body --/ 5250b57cec5SDimitry Andric // V V V V V 5260b57cec5SDimitry Andric // Exit Exit Exit 5270b57cec5SDimitry Andric // 5280b57cec5SDimitry Andric // By doing this is two separate steps we can perform the dominator tree 5290b57cec5SDimitry Andric // update without using the batch update API. 5300b57cec5SDimitry Andric // 5310b57cec5SDimitry Andric // Even when the loop is never executed, we cannot remove the edge from the 5320b57cec5SDimitry Andric // source block to the exit block. Consider the case where the unexecuted loop 5330b57cec5SDimitry Andric // branches back to an outer loop. If we deleted the loop and removed the edge 5340b57cec5SDimitry Andric // coming to this inner loop, this will break the outer loop structure (by 5350b57cec5SDimitry Andric // deleting the backedge of the outer loop). If the outer loop is indeed a 5360b57cec5SDimitry Andric // non-loop, it will be deleted in a future iteration of loop deletion pass. 53781ad6265SDimitry Andric IRBuilder<> Builder(OldTerm); 538e8d8bef9SDimitry Andric 539e8d8bef9SDimitry Andric auto *ExitBlock = L->getUniqueExitBlock(); 540e8d8bef9SDimitry Andric DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 541e8d8bef9SDimitry Andric if (ExitBlock) { 542e8d8bef9SDimitry Andric assert(ExitBlock && "Should have a unique exit block!"); 543e8d8bef9SDimitry Andric assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); 544e8d8bef9SDimitry Andric 5450b57cec5SDimitry Andric Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); 5460b57cec5SDimitry Andric // Remove the old branch. The conditional branch becomes a new terminator. 54781ad6265SDimitry Andric OldTerm->eraseFromParent(); 5480b57cec5SDimitry Andric 5490b57cec5SDimitry Andric // Rewrite phis in the exit block to get their inputs from the Preheader 5500b57cec5SDimitry Andric // instead of the exiting block. 5510b57cec5SDimitry Andric for (PHINode &P : ExitBlock->phis()) { 5520b57cec5SDimitry Andric // Set the zero'th element of Phi to be from the preheader and remove all 5530b57cec5SDimitry Andric // other incoming values. Given the loop has dedicated exits, all other 5540b57cec5SDimitry Andric // incoming values must be from the exiting blocks. 5550b57cec5SDimitry Andric int PredIndex = 0; 5560b57cec5SDimitry Andric P.setIncomingBlock(PredIndex, Preheader); 5570b57cec5SDimitry Andric // Removes all incoming values from all other exiting blocks (including 5580b57cec5SDimitry Andric // duplicate values from an exiting block). 5590b57cec5SDimitry Andric // Nuke all entries except the zero'th entry which is the preheader entry. 5605f757f3fSDimitry Andric P.removeIncomingValueIf([](unsigned Idx) { return Idx != 0; }, 5615f757f3fSDimitry Andric /* DeletePHIIfEmpty */ false); 5620b57cec5SDimitry Andric 5630b57cec5SDimitry Andric assert((P.getNumIncomingValues() == 1 && 5640b57cec5SDimitry Andric P.getIncomingBlock(PredIndex) == Preheader) && 5650b57cec5SDimitry Andric "Should have exactly one value and that's from the preheader!"); 5660b57cec5SDimitry Andric } 5670b57cec5SDimitry Andric 5685ffd83dbSDimitry Andric if (DT) { 5695ffd83dbSDimitry Andric DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}}); 5705ffd83dbSDimitry Andric if (MSSA) { 571e8d8bef9SDimitry Andric MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}}, 572e8d8bef9SDimitry Andric *DT); 5735ffd83dbSDimitry Andric if (VerifyMemorySSA) 5745ffd83dbSDimitry Andric MSSA->verifyMemorySSA(); 5755ffd83dbSDimitry Andric } 5765ffd83dbSDimitry Andric } 5775ffd83dbSDimitry Andric 5780b57cec5SDimitry Andric // Disconnect the loop body by branching directly to its exit. 5790b57cec5SDimitry Andric Builder.SetInsertPoint(Preheader->getTerminator()); 5800b57cec5SDimitry Andric Builder.CreateBr(ExitBlock); 5810b57cec5SDimitry Andric // Remove the old branch. 5820b57cec5SDimitry Andric Preheader->getTerminator()->eraseFromParent(); 583e8d8bef9SDimitry Andric } else { 584e8d8bef9SDimitry Andric assert(L->hasNoExitBlocks() && 585e8d8bef9SDimitry Andric "Loop should have either zero or one exit blocks."); 586e8d8bef9SDimitry Andric 58781ad6265SDimitry Andric Builder.SetInsertPoint(OldTerm); 588e8d8bef9SDimitry Andric Builder.CreateUnreachable(); 589e8d8bef9SDimitry Andric Preheader->getTerminator()->eraseFromParent(); 590e8d8bef9SDimitry Andric } 5910b57cec5SDimitry Andric 5920b57cec5SDimitry Andric if (DT) { 5935ffd83dbSDimitry Andric DTU.applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}}); 5945ffd83dbSDimitry Andric if (MSSA) { 5955ffd83dbSDimitry Andric MSSAU->applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}}, 5965ffd83dbSDimitry Andric *DT); 5975ffd83dbSDimitry Andric SmallSetVector<BasicBlock *, 8> DeadBlockSet(L->block_begin(), 5985ffd83dbSDimitry Andric L->block_end()); 5995ffd83dbSDimitry Andric MSSAU->removeBlocks(DeadBlockSet); 6005ffd83dbSDimitry Andric if (VerifyMemorySSA) 6015ffd83dbSDimitry Andric MSSA->verifyMemorySSA(); 6025ffd83dbSDimitry Andric } 6030b57cec5SDimitry Andric } 6040b57cec5SDimitry Andric 6050b57cec5SDimitry Andric // Use a map to unique and a vector to guarantee deterministic ordering. 606bdd1243dSDimitry Andric llvm::SmallDenseSet<DebugVariable, 4> DeadDebugSet; 6070b57cec5SDimitry Andric llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst; 6080fca6ea1SDimitry Andric llvm::SmallVector<DbgVariableRecord *, 4> DeadDbgVariableRecords; 6090b57cec5SDimitry Andric 610e8d8bef9SDimitry Andric if (ExitBlock) { 6110b57cec5SDimitry Andric // Given LCSSA form is satisfied, we should not have users of instructions 6120b57cec5SDimitry Andric // within the dead loop outside of the loop. However, LCSSA doesn't take 6130b57cec5SDimitry Andric // unreachable uses into account. We handle them here. 6140b57cec5SDimitry Andric // We could do it after drop all references (in this case all users in the 6150b57cec5SDimitry Andric // loop will be already eliminated and we have less work to do but according 6160b57cec5SDimitry Andric // to API doc of User::dropAllReferences only valid operation after dropping 6170b57cec5SDimitry Andric // references, is deletion. So let's substitute all usages of 618fcaf7f86SDimitry Andric // instruction from the loop with poison value of corresponding type first. 6190b57cec5SDimitry Andric for (auto *Block : L->blocks()) 6200b57cec5SDimitry Andric for (Instruction &I : *Block) { 621fcaf7f86SDimitry Andric auto *Poison = PoisonValue::get(I.getType()); 622349cc55cSDimitry Andric for (Use &U : llvm::make_early_inc_range(I.uses())) { 6230b57cec5SDimitry Andric if (auto *Usr = dyn_cast<Instruction>(U.getUser())) 6240b57cec5SDimitry Andric if (L->contains(Usr->getParent())) 6250b57cec5SDimitry Andric continue; 6260b57cec5SDimitry Andric // If we have a DT then we can check that uses outside a loop only in 6270b57cec5SDimitry Andric // unreachable block. 6280b57cec5SDimitry Andric if (DT) 6290b57cec5SDimitry Andric assert(!DT->isReachableFromEntry(U) && 6300b57cec5SDimitry Andric "Unexpected user in reachable block"); 631fcaf7f86SDimitry Andric U.set(Poison); 6320b57cec5SDimitry Andric } 6335f757f3fSDimitry Andric 6340fca6ea1SDimitry Andric // RemoveDIs: do the same as below for DbgVariableRecords. 6355f757f3fSDimitry Andric if (Block->IsNewDbgInfoFormat) { 6360fca6ea1SDimitry Andric for (DbgVariableRecord &DVR : llvm::make_early_inc_range( 6370fca6ea1SDimitry Andric filterDbgVars(I.getDbgRecordRange()))) { 6380fca6ea1SDimitry Andric DebugVariable Key(DVR.getVariable(), DVR.getExpression(), 6390fca6ea1SDimitry Andric DVR.getDebugLoc().get()); 6405f757f3fSDimitry Andric if (!DeadDebugSet.insert(Key).second) 6415f757f3fSDimitry Andric continue; 6420fca6ea1SDimitry Andric // Unlinks the DVR from it's container, for later insertion. 6430fca6ea1SDimitry Andric DVR.removeFromParent(); 6440fca6ea1SDimitry Andric DeadDbgVariableRecords.push_back(&DVR); 6455f757f3fSDimitry Andric } 6465f757f3fSDimitry Andric } 6475f757f3fSDimitry Andric 6485f757f3fSDimitry Andric // For one of each variable encountered, preserve a debug intrinsic (set 6495f757f3fSDimitry Andric // to Poison) and transfer it to the loop exit. This terminates any 6505f757f3fSDimitry Andric // variable locations that were set during the loop. 6510b57cec5SDimitry Andric auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I); 6520b57cec5SDimitry Andric if (!DVI) 6530b57cec5SDimitry Andric continue; 654bdd1243dSDimitry Andric if (!DeadDebugSet.insert(DebugVariable(DVI)).second) 6550b57cec5SDimitry Andric continue; 6560b57cec5SDimitry Andric DeadDebugInst.push_back(DVI); 6570b57cec5SDimitry Andric } 6580b57cec5SDimitry Andric 6590b57cec5SDimitry Andric // After the loop has been deleted all the values defined and modified 66006c3fb27SDimitry Andric // inside the loop are going to be unavailable. Values computed in the 66106c3fb27SDimitry Andric // loop will have been deleted, automatically causing their debug uses 66206c3fb27SDimitry Andric // be be replaced with undef. Loop invariant values will still be available. 66306c3fb27SDimitry Andric // Move dbg.values out the loop so that earlier location ranges are still 66406c3fb27SDimitry Andric // terminated and loop invariant assignments are preserved. 6655f757f3fSDimitry Andric DIBuilder DIB(*ExitBlock->getModule()); 6665f757f3fSDimitry Andric BasicBlock::iterator InsertDbgValueBefore = 6675f757f3fSDimitry Andric ExitBlock->getFirstInsertionPt(); 6685f757f3fSDimitry Andric assert(InsertDbgValueBefore != ExitBlock->end() && 6690b57cec5SDimitry Andric "There should be a non-PHI instruction in exit block, else these " 6700b57cec5SDimitry Andric "instructions will have no parent."); 6715f757f3fSDimitry Andric 67206c3fb27SDimitry Andric for (auto *DVI : DeadDebugInst) 6735f757f3fSDimitry Andric DVI->moveBefore(*ExitBlock, InsertDbgValueBefore); 6745f757f3fSDimitry Andric 6755f757f3fSDimitry Andric // Due to the "head" bit in BasicBlock::iterator, we're going to insert 6760fca6ea1SDimitry Andric // each DbgVariableRecord right at the start of the block, wheras dbg.values 6770fca6ea1SDimitry Andric // would be repeatedly inserted before the first instruction. To replicate 6780fca6ea1SDimitry Andric // this behaviour, do it backwards. 6790fca6ea1SDimitry Andric for (DbgVariableRecord *DVR : llvm::reverse(DeadDbgVariableRecords)) 6800fca6ea1SDimitry Andric ExitBlock->insertDbgRecordBefore(DVR, InsertDbgValueBefore); 681bdd1243dSDimitry Andric } 6820b57cec5SDimitry Andric 6830b57cec5SDimitry Andric // Remove the block from the reference counting scheme, so that we can 6840b57cec5SDimitry Andric // delete it freely later. 6850b57cec5SDimitry Andric for (auto *Block : L->blocks()) 6860b57cec5SDimitry Andric Block->dropAllReferences(); 6870b57cec5SDimitry Andric 6885ffd83dbSDimitry Andric if (MSSA && VerifyMemorySSA) 6895ffd83dbSDimitry Andric MSSA->verifyMemorySSA(); 6905ffd83dbSDimitry Andric 6910b57cec5SDimitry Andric if (LI) { 6920b57cec5SDimitry Andric // Erase the instructions and the blocks without having to worry 6930b57cec5SDimitry Andric // about ordering because we already dropped the references. 6940b57cec5SDimitry Andric // NOTE: This iteration is safe because erasing the block does not remove 6950b57cec5SDimitry Andric // its entry from the loop's block list. We do that in the next section. 6965e801ac6SDimitry Andric for (BasicBlock *BB : L->blocks()) 6975e801ac6SDimitry Andric BB->eraseFromParent(); 6980b57cec5SDimitry Andric 6990b57cec5SDimitry Andric // Finally, the blocks from loopinfo. This has to happen late because 7000b57cec5SDimitry Andric // otherwise our loop iterators won't work. 7010b57cec5SDimitry Andric 7020b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 8> blocks; 7030b57cec5SDimitry Andric blocks.insert(L->block_begin(), L->block_end()); 7040b57cec5SDimitry Andric for (BasicBlock *BB : blocks) 7050b57cec5SDimitry Andric LI->removeBlock(BB); 7060b57cec5SDimitry Andric 7070b57cec5SDimitry Andric // The last step is to update LoopInfo now that we've eliminated this loop. 708480093f4SDimitry Andric // Note: LoopInfo::erase remove the given loop and relink its subloops with 709480093f4SDimitry Andric // its parent. While removeLoop/removeChildLoop remove the given loop but 710480093f4SDimitry Andric // not relink its subloops, which is what we want. 711480093f4SDimitry Andric if (Loop *ParentLoop = L->getParentLoop()) { 7125ffd83dbSDimitry Andric Loop::iterator I = find(*ParentLoop, L); 713480093f4SDimitry Andric assert(I != ParentLoop->end() && "Couldn't find loop"); 714480093f4SDimitry Andric ParentLoop->removeChildLoop(I); 715480093f4SDimitry Andric } else { 7165ffd83dbSDimitry Andric Loop::iterator I = find(*LI, L); 717480093f4SDimitry Andric assert(I != LI->end() && "Couldn't find loop"); 718480093f4SDimitry Andric LI->removeLoop(I); 719480093f4SDimitry Andric } 720480093f4SDimitry Andric LI->destroy(L); 7210b57cec5SDimitry Andric } 7220b57cec5SDimitry Andric } 7230b57cec5SDimitry Andric 724e8d8bef9SDimitry Andric void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, 725e8d8bef9SDimitry Andric LoopInfo &LI, MemorySSA *MSSA) { 726e8d8bef9SDimitry Andric auto *Latch = L->getLoopLatch(); 727e8d8bef9SDimitry Andric assert(Latch && "multiple latches not yet supported"); 728e8d8bef9SDimitry Andric auto *Header = L->getHeader(); 72981ad6265SDimitry Andric Loop *OutermostLoop = L->getOutermostLoop(); 730e8d8bef9SDimitry Andric 731e8d8bef9SDimitry Andric SE.forgetLoop(L); 732bdd1243dSDimitry Andric SE.forgetBlockAndLoopDispositions(); 733e8d8bef9SDimitry Andric 734e8d8bef9SDimitry Andric std::unique_ptr<MemorySSAUpdater> MSSAU; 735e8d8bef9SDimitry Andric if (MSSA) 736e8d8bef9SDimitry Andric MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 737e8d8bef9SDimitry Andric 738349cc55cSDimitry Andric // Update the CFG and domtree. We chose to special case a couple of 739349cc55cSDimitry Andric // of common cases for code quality and test readability reasons. 740349cc55cSDimitry Andric [&]() -> void { 741349cc55cSDimitry Andric if (auto *BI = dyn_cast<BranchInst>(Latch->getTerminator())) { 742349cc55cSDimitry Andric if (!BI->isConditional()) { 743349cc55cSDimitry Andric DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager); 744349cc55cSDimitry Andric (void)changeToUnreachable(BI, /*PreserveLCSSA*/ true, &DTU, 745349cc55cSDimitry Andric MSSAU.get()); 746349cc55cSDimitry Andric return; 747349cc55cSDimitry Andric } 748349cc55cSDimitry Andric 749349cc55cSDimitry Andric // Conditional latch/exit - note that latch can be shared by inner 750349cc55cSDimitry Andric // and outer loop so the other target doesn't need to an exit 751349cc55cSDimitry Andric if (L->isLoopExiting(Latch)) { 752349cc55cSDimitry Andric // TODO: Generalize ConstantFoldTerminator so that it can be used 753349cc55cSDimitry Andric // here without invalidating LCSSA or MemorySSA. (Tricky case for 754349cc55cSDimitry Andric // LCSSA: header is an exit block of a preceeding sibling loop w/o 755349cc55cSDimitry Andric // dedicated exits.) 756349cc55cSDimitry Andric const unsigned ExitIdx = L->contains(BI->getSuccessor(0)) ? 1 : 0; 757349cc55cSDimitry Andric BasicBlock *ExitBB = BI->getSuccessor(ExitIdx); 758349cc55cSDimitry Andric 759349cc55cSDimitry Andric DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager); 760349cc55cSDimitry Andric Header->removePredecessor(Latch, true); 761349cc55cSDimitry Andric 762349cc55cSDimitry Andric IRBuilder<> Builder(BI); 763349cc55cSDimitry Andric auto *NewBI = Builder.CreateBr(ExitBB); 764349cc55cSDimitry Andric // Transfer the metadata to the new branch instruction (minus the 765349cc55cSDimitry Andric // loop info since this is no longer a loop) 766349cc55cSDimitry Andric NewBI->copyMetadata(*BI, {LLVMContext::MD_dbg, 767349cc55cSDimitry Andric LLVMContext::MD_annotation}); 768349cc55cSDimitry Andric 769349cc55cSDimitry Andric BI->eraseFromParent(); 770349cc55cSDimitry Andric DTU.applyUpdates({{DominatorTree::Delete, Latch, Header}}); 771349cc55cSDimitry Andric if (MSSA) 772349cc55cSDimitry Andric MSSAU->applyUpdates({{DominatorTree::Delete, Latch, Header}}, DT); 773349cc55cSDimitry Andric return; 774349cc55cSDimitry Andric } 775349cc55cSDimitry Andric } 776349cc55cSDimitry Andric 777349cc55cSDimitry Andric // General case. By splitting the backedge, and then explicitly making it 778349cc55cSDimitry Andric // unreachable we gracefully handle corner cases such as switch and invoke 779349cc55cSDimitry Andric // termiantors. 780e8d8bef9SDimitry Andric auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get()); 781e8d8bef9SDimitry Andric 782e8d8bef9SDimitry Andric DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager); 783fe6060f1SDimitry Andric (void)changeToUnreachable(BackedgeBB->getTerminator(), 784e8d8bef9SDimitry Andric /*PreserveLCSSA*/ true, &DTU, MSSAU.get()); 785349cc55cSDimitry Andric }(); 786e8d8bef9SDimitry Andric 787e8d8bef9SDimitry Andric // Erase (and destroy) this loop instance. Handles relinking sub-loops 788e8d8bef9SDimitry Andric // and blocks within the loop as needed. 789e8d8bef9SDimitry Andric LI.erase(L); 790e8d8bef9SDimitry Andric 791e8d8bef9SDimitry Andric // If the loop we broke had a parent, then changeToUnreachable might have 792e8d8bef9SDimitry Andric // caused a block to be removed from the parent loop (see loop_nest_lcssa 793e8d8bef9SDimitry Andric // test case in zero-btc.ll for an example), thus changing the parent's 794e8d8bef9SDimitry Andric // exit blocks. If that happened, we need to rebuild LCSSA on the outermost 795e8d8bef9SDimitry Andric // loop which might have a had a block removed. 796e8d8bef9SDimitry Andric if (OutermostLoop != L) 797e8d8bef9SDimitry Andric formLCSSARecursively(*OutermostLoop, DT, &LI, &SE); 798e8d8bef9SDimitry Andric } 799e8d8bef9SDimitry Andric 800e8d8bef9SDimitry Andric 8010eae32dcSDimitry Andric /// Checks if \p L has an exiting latch branch. There may also be other 8020eae32dcSDimitry Andric /// exiting blocks. Returns branch instruction terminating the loop 8035ffd83dbSDimitry Andric /// latch if above check is successful, nullptr otherwise. 8045ffd83dbSDimitry Andric static BranchInst *getExpectedExitLoopLatchBranch(Loop *L) { 8050b57cec5SDimitry Andric BasicBlock *Latch = L->getLoopLatch(); 8060b57cec5SDimitry Andric if (!Latch) 8075ffd83dbSDimitry Andric return nullptr; 8085ffd83dbSDimitry Andric 8090b57cec5SDimitry Andric BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator()); 8100b57cec5SDimitry Andric if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch)) 8115ffd83dbSDimitry Andric return nullptr; 8120b57cec5SDimitry Andric 8130b57cec5SDimitry Andric assert((LatchBR->getSuccessor(0) == L->getHeader() || 8140b57cec5SDimitry Andric LatchBR->getSuccessor(1) == L->getHeader()) && 8150b57cec5SDimitry Andric "At least one edge out of the latch must go to the header"); 8160b57cec5SDimitry Andric 8175ffd83dbSDimitry Andric return LatchBR; 8185ffd83dbSDimitry Andric } 8195ffd83dbSDimitry Andric 8200eae32dcSDimitry Andric /// Return the estimated trip count for any exiting branch which dominates 8210eae32dcSDimitry Andric /// the loop latch. 822bdd1243dSDimitry Andric static std::optional<uint64_t> getEstimatedTripCount(BranchInst *ExitingBranch, 823bdd1243dSDimitry Andric Loop *L, 8240eae32dcSDimitry Andric uint64_t &OrigExitWeight) { 8250eae32dcSDimitry Andric // To estimate the number of times the loop body was executed, we want to 8260eae32dcSDimitry Andric // know the number of times the backedge was taken, vs. the number of times 8270eae32dcSDimitry Andric // we exited the loop. 8280eae32dcSDimitry Andric uint64_t LoopWeight, ExitWeight; 829bdd1243dSDimitry Andric if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight)) 830bdd1243dSDimitry Andric return std::nullopt; 8310eae32dcSDimitry Andric 8320eae32dcSDimitry Andric if (L->contains(ExitingBranch->getSuccessor(1))) 8330eae32dcSDimitry Andric std::swap(LoopWeight, ExitWeight); 8340eae32dcSDimitry Andric 8350eae32dcSDimitry Andric if (!ExitWeight) 8360eae32dcSDimitry Andric // Don't have a way to return predicated infinite 837bdd1243dSDimitry Andric return std::nullopt; 8380eae32dcSDimitry Andric 8390eae32dcSDimitry Andric OrigExitWeight = ExitWeight; 8400eae32dcSDimitry Andric 8410eae32dcSDimitry Andric // Estimated exit count is a ratio of the loop weight by the weight of the 8420eae32dcSDimitry Andric // edge exiting the loop, rounded to nearest. 8430eae32dcSDimitry Andric uint64_t ExitCount = llvm::divideNearest(LoopWeight, ExitWeight); 8440eae32dcSDimitry Andric // Estimated trip count is one plus estimated exit count. 8450eae32dcSDimitry Andric return ExitCount + 1; 8460eae32dcSDimitry Andric } 8470eae32dcSDimitry Andric 848bdd1243dSDimitry Andric std::optional<unsigned> 8495ffd83dbSDimitry Andric llvm::getLoopEstimatedTripCount(Loop *L, 8505ffd83dbSDimitry Andric unsigned *EstimatedLoopInvocationWeight) { 8510eae32dcSDimitry Andric // Currently we take the estimate exit count only from the loop latch, 8520eae32dcSDimitry Andric // ignoring other exiting blocks. This can overestimate the trip count 8530eae32dcSDimitry Andric // if we exit through another exit, but can never underestimate it. 8540eae32dcSDimitry Andric // TODO: incorporate information from other exits 8550eae32dcSDimitry Andric if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) { 8560eae32dcSDimitry Andric uint64_t ExitWeight; 857bdd1243dSDimitry Andric if (std::optional<uint64_t> EstTripCount = 8580eae32dcSDimitry Andric getEstimatedTripCount(LatchBranch, L, ExitWeight)) { 8595ffd83dbSDimitry Andric if (EstimatedLoopInvocationWeight) 8600eae32dcSDimitry Andric *EstimatedLoopInvocationWeight = ExitWeight; 8610eae32dcSDimitry Andric return *EstTripCount; 8620eae32dcSDimitry Andric } 8630eae32dcSDimitry Andric } 864bdd1243dSDimitry Andric return std::nullopt; 8655ffd83dbSDimitry Andric } 8665ffd83dbSDimitry Andric 8675ffd83dbSDimitry Andric bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, 8685ffd83dbSDimitry Andric unsigned EstimatedloopInvocationWeight) { 8690eae32dcSDimitry Andric // At the moment, we currently support changing the estimate trip count of 8700eae32dcSDimitry Andric // the latch branch only. We could extend this API to manipulate estimated 8710eae32dcSDimitry Andric // trip counts for any exit. 8725ffd83dbSDimitry Andric BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L); 8735ffd83dbSDimitry Andric if (!LatchBranch) 8745ffd83dbSDimitry Andric return false; 8755ffd83dbSDimitry Andric 8765ffd83dbSDimitry Andric // Calculate taken and exit weights. 8775ffd83dbSDimitry Andric unsigned LatchExitWeight = 0; 8785ffd83dbSDimitry Andric unsigned BackedgeTakenWeight = 0; 8795ffd83dbSDimitry Andric 8805ffd83dbSDimitry Andric if (EstimatedTripCount > 0) { 8815ffd83dbSDimitry Andric LatchExitWeight = EstimatedloopInvocationWeight; 8825ffd83dbSDimitry Andric BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight; 8835ffd83dbSDimitry Andric } 8845ffd83dbSDimitry Andric 8855ffd83dbSDimitry Andric // Make a swap if back edge is taken when condition is "false". 8865ffd83dbSDimitry Andric if (LatchBranch->getSuccessor(0) != L->getHeader()) 8875ffd83dbSDimitry Andric std::swap(BackedgeTakenWeight, LatchExitWeight); 8885ffd83dbSDimitry Andric 8895ffd83dbSDimitry Andric MDBuilder MDB(LatchBranch->getContext()); 8905ffd83dbSDimitry Andric 8915ffd83dbSDimitry Andric // Set/Update profile metadata. 8925ffd83dbSDimitry Andric LatchBranch->setMetadata( 8935ffd83dbSDimitry Andric LLVMContext::MD_prof, 8945ffd83dbSDimitry Andric MDB.createBranchWeights(BackedgeTakenWeight, LatchExitWeight)); 8955ffd83dbSDimitry Andric 8965ffd83dbSDimitry Andric return true; 8970b57cec5SDimitry Andric } 8980b57cec5SDimitry Andric 8990b57cec5SDimitry Andric bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop, 9000b57cec5SDimitry Andric ScalarEvolution &SE) { 9010b57cec5SDimitry Andric Loop *OuterL = InnerLoop->getParentLoop(); 9020b57cec5SDimitry Andric if (!OuterL) 9030b57cec5SDimitry Andric return true; 9040b57cec5SDimitry Andric 9050b57cec5SDimitry Andric // Get the backedge taken count for the inner loop 9060b57cec5SDimitry Andric BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); 9070b57cec5SDimitry Andric const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch); 9080b57cec5SDimitry Andric if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) || 9090b57cec5SDimitry Andric !InnerLoopBECountSC->getType()->isIntegerTy()) 9100b57cec5SDimitry Andric return false; 9110b57cec5SDimitry Andric 9120b57cec5SDimitry Andric // Get whether count is invariant to the outer loop 9130b57cec5SDimitry Andric ScalarEvolution::LoopDisposition LD = 9140b57cec5SDimitry Andric SE.getLoopDisposition(InnerLoopBECountSC, OuterL); 9150b57cec5SDimitry Andric if (LD != ScalarEvolution::LoopInvariant) 9160b57cec5SDimitry Andric return false; 9170b57cec5SDimitry Andric 9180b57cec5SDimitry Andric return true; 9190b57cec5SDimitry Andric } 9200b57cec5SDimitry Andric 921*52418fc2SDimitry Andric constexpr Intrinsic::ID llvm::getReductionIntrinsicID(RecurKind RK) { 922*52418fc2SDimitry Andric switch (RK) { 923*52418fc2SDimitry Andric default: 924*52418fc2SDimitry Andric llvm_unreachable("Unexpected recurrence kind"); 925*52418fc2SDimitry Andric case RecurKind::Add: 926*52418fc2SDimitry Andric return Intrinsic::vector_reduce_add; 927*52418fc2SDimitry Andric case RecurKind::Mul: 928*52418fc2SDimitry Andric return Intrinsic::vector_reduce_mul; 929*52418fc2SDimitry Andric case RecurKind::And: 930*52418fc2SDimitry Andric return Intrinsic::vector_reduce_and; 931*52418fc2SDimitry Andric case RecurKind::Or: 932*52418fc2SDimitry Andric return Intrinsic::vector_reduce_or; 933*52418fc2SDimitry Andric case RecurKind::Xor: 934*52418fc2SDimitry Andric return Intrinsic::vector_reduce_xor; 935*52418fc2SDimitry Andric case RecurKind::FMulAdd: 936*52418fc2SDimitry Andric case RecurKind::FAdd: 937*52418fc2SDimitry Andric return Intrinsic::vector_reduce_fadd; 938*52418fc2SDimitry Andric case RecurKind::FMul: 939*52418fc2SDimitry Andric return Intrinsic::vector_reduce_fmul; 940*52418fc2SDimitry Andric case RecurKind::SMax: 941*52418fc2SDimitry Andric return Intrinsic::vector_reduce_smax; 942*52418fc2SDimitry Andric case RecurKind::SMin: 943*52418fc2SDimitry Andric return Intrinsic::vector_reduce_smin; 944*52418fc2SDimitry Andric case RecurKind::UMax: 945*52418fc2SDimitry Andric return Intrinsic::vector_reduce_umax; 946*52418fc2SDimitry Andric case RecurKind::UMin: 947*52418fc2SDimitry Andric return Intrinsic::vector_reduce_umin; 948*52418fc2SDimitry Andric case RecurKind::FMax: 949*52418fc2SDimitry Andric return Intrinsic::vector_reduce_fmax; 950*52418fc2SDimitry Andric case RecurKind::FMin: 951*52418fc2SDimitry Andric return Intrinsic::vector_reduce_fmin; 952*52418fc2SDimitry Andric case RecurKind::FMaximum: 953*52418fc2SDimitry Andric return Intrinsic::vector_reduce_fmaximum; 954*52418fc2SDimitry Andric case RecurKind::FMinimum: 955*52418fc2SDimitry Andric return Intrinsic::vector_reduce_fminimum; 956*52418fc2SDimitry Andric } 957*52418fc2SDimitry Andric } 958*52418fc2SDimitry Andric 9590fca6ea1SDimitry Andric unsigned llvm::getArithmeticReductionInstruction(Intrinsic::ID RdxID) { 9600fca6ea1SDimitry Andric switch (RdxID) { 9610fca6ea1SDimitry Andric case Intrinsic::vector_reduce_fadd: 9620fca6ea1SDimitry Andric return Instruction::FAdd; 9630fca6ea1SDimitry Andric case Intrinsic::vector_reduce_fmul: 9640fca6ea1SDimitry Andric return Instruction::FMul; 9650fca6ea1SDimitry Andric case Intrinsic::vector_reduce_add: 9660fca6ea1SDimitry Andric return Instruction::Add; 9670fca6ea1SDimitry Andric case Intrinsic::vector_reduce_mul: 9680fca6ea1SDimitry Andric return Instruction::Mul; 9690fca6ea1SDimitry Andric case Intrinsic::vector_reduce_and: 9700fca6ea1SDimitry Andric return Instruction::And; 9710fca6ea1SDimitry Andric case Intrinsic::vector_reduce_or: 9720fca6ea1SDimitry Andric return Instruction::Or; 9730fca6ea1SDimitry Andric case Intrinsic::vector_reduce_xor: 9740fca6ea1SDimitry Andric return Instruction::Xor; 9750fca6ea1SDimitry Andric case Intrinsic::vector_reduce_smax: 9760fca6ea1SDimitry Andric case Intrinsic::vector_reduce_smin: 9770fca6ea1SDimitry Andric case Intrinsic::vector_reduce_umax: 9780fca6ea1SDimitry Andric case Intrinsic::vector_reduce_umin: 9790fca6ea1SDimitry Andric return Instruction::ICmp; 9800fca6ea1SDimitry Andric case Intrinsic::vector_reduce_fmax: 9810fca6ea1SDimitry Andric case Intrinsic::vector_reduce_fmin: 9820fca6ea1SDimitry Andric return Instruction::FCmp; 9830fca6ea1SDimitry Andric default: 9840fca6ea1SDimitry Andric llvm_unreachable("Unexpected ID"); 9850fca6ea1SDimitry Andric } 9860fca6ea1SDimitry Andric } 9870fca6ea1SDimitry Andric 9880fca6ea1SDimitry Andric Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID) { 9890fca6ea1SDimitry Andric switch (RdxID) { 9900fca6ea1SDimitry Andric default: 9910fca6ea1SDimitry Andric llvm_unreachable("Unknown min/max recurrence kind"); 9920fca6ea1SDimitry Andric case Intrinsic::vector_reduce_umin: 9930fca6ea1SDimitry Andric return Intrinsic::umin; 9940fca6ea1SDimitry Andric case Intrinsic::vector_reduce_umax: 9950fca6ea1SDimitry Andric return Intrinsic::umax; 9960fca6ea1SDimitry Andric case Intrinsic::vector_reduce_smin: 9970fca6ea1SDimitry Andric return Intrinsic::smin; 9980fca6ea1SDimitry Andric case Intrinsic::vector_reduce_smax: 9990fca6ea1SDimitry Andric return Intrinsic::smax; 10000fca6ea1SDimitry Andric case Intrinsic::vector_reduce_fmin: 10010fca6ea1SDimitry Andric return Intrinsic::minnum; 10020fca6ea1SDimitry Andric case Intrinsic::vector_reduce_fmax: 10030fca6ea1SDimitry Andric return Intrinsic::maxnum; 10040fca6ea1SDimitry Andric case Intrinsic::vector_reduce_fminimum: 10050fca6ea1SDimitry Andric return Intrinsic::minimum; 10060fca6ea1SDimitry Andric case Intrinsic::vector_reduce_fmaximum: 10070fca6ea1SDimitry Andric return Intrinsic::maximum; 10080fca6ea1SDimitry Andric } 10090fca6ea1SDimitry Andric } 10100fca6ea1SDimitry Andric 101106c3fb27SDimitry Andric Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) { 101206c3fb27SDimitry Andric switch (RK) { 101306c3fb27SDimitry Andric default: 101406c3fb27SDimitry Andric llvm_unreachable("Unknown min/max recurrence kind"); 101506c3fb27SDimitry Andric case RecurKind::UMin: 101606c3fb27SDimitry Andric return Intrinsic::umin; 101706c3fb27SDimitry Andric case RecurKind::UMax: 101806c3fb27SDimitry Andric return Intrinsic::umax; 101906c3fb27SDimitry Andric case RecurKind::SMin: 102006c3fb27SDimitry Andric return Intrinsic::smin; 102106c3fb27SDimitry Andric case RecurKind::SMax: 102206c3fb27SDimitry Andric return Intrinsic::smax; 102306c3fb27SDimitry Andric case RecurKind::FMin: 102406c3fb27SDimitry Andric return Intrinsic::minnum; 102506c3fb27SDimitry Andric case RecurKind::FMax: 102606c3fb27SDimitry Andric return Intrinsic::maxnum; 102706c3fb27SDimitry Andric case RecurKind::FMinimum: 102806c3fb27SDimitry Andric return Intrinsic::minimum; 102906c3fb27SDimitry Andric case RecurKind::FMaximum: 103006c3fb27SDimitry Andric return Intrinsic::maximum; 103106c3fb27SDimitry Andric } 103206c3fb27SDimitry Andric } 103306c3fb27SDimitry Andric 10340fca6ea1SDimitry Andric RecurKind llvm::getMinMaxReductionRecurKind(Intrinsic::ID RdxID) { 10350fca6ea1SDimitry Andric switch (RdxID) { 10360fca6ea1SDimitry Andric case Intrinsic::vector_reduce_smax: 10370fca6ea1SDimitry Andric return RecurKind::SMax; 10380fca6ea1SDimitry Andric case Intrinsic::vector_reduce_smin: 10390fca6ea1SDimitry Andric return RecurKind::SMin; 10400fca6ea1SDimitry Andric case Intrinsic::vector_reduce_umax: 10410fca6ea1SDimitry Andric return RecurKind::UMax; 10420fca6ea1SDimitry Andric case Intrinsic::vector_reduce_umin: 10430fca6ea1SDimitry Andric return RecurKind::UMin; 10440fca6ea1SDimitry Andric case Intrinsic::vector_reduce_fmax: 10450fca6ea1SDimitry Andric return RecurKind::FMax; 10460fca6ea1SDimitry Andric case Intrinsic::vector_reduce_fmin: 10470fca6ea1SDimitry Andric return RecurKind::FMin; 10480fca6ea1SDimitry Andric default: 10490fca6ea1SDimitry Andric return RecurKind::None; 10500fca6ea1SDimitry Andric } 10510fca6ea1SDimitry Andric } 10520fca6ea1SDimitry Andric 1053349cc55cSDimitry Andric CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) { 10540b57cec5SDimitry Andric switch (RK) { 10550b57cec5SDimitry Andric default: 10560b57cec5SDimitry Andric llvm_unreachable("Unknown min/max recurrence kind"); 1057e8d8bef9SDimitry Andric case RecurKind::UMin: 1058349cc55cSDimitry Andric return CmpInst::ICMP_ULT; 1059e8d8bef9SDimitry Andric case RecurKind::UMax: 1060349cc55cSDimitry Andric return CmpInst::ICMP_UGT; 1061e8d8bef9SDimitry Andric case RecurKind::SMin: 1062349cc55cSDimitry Andric return CmpInst::ICMP_SLT; 1063e8d8bef9SDimitry Andric case RecurKind::SMax: 1064349cc55cSDimitry Andric return CmpInst::ICMP_SGT; 1065e8d8bef9SDimitry Andric case RecurKind::FMin: 1066349cc55cSDimitry Andric return CmpInst::FCMP_OLT; 1067e8d8bef9SDimitry Andric case RecurKind::FMax: 1068349cc55cSDimitry Andric return CmpInst::FCMP_OGT; 106906c3fb27SDimitry Andric // We do not add FMinimum/FMaximum recurrence kind here since there is no 107006c3fb27SDimitry Andric // equivalent predicate which compares signed zeroes according to the 107106c3fb27SDimitry Andric // semantics of the intrinsics (llvm.minimum/maximum). 1072349cc55cSDimitry Andric } 10730b57cec5SDimitry Andric } 10740b57cec5SDimitry Andric 1075349cc55cSDimitry Andric Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, 1076349cc55cSDimitry Andric Value *Right) { 107706c3fb27SDimitry Andric Type *Ty = Left->getType(); 107806c3fb27SDimitry Andric if (Ty->isIntOrIntVectorTy() || 107906c3fb27SDimitry Andric (RK == RecurKind::FMinimum || RK == RecurKind::FMaximum)) { 108006c3fb27SDimitry Andric // TODO: Add float minnum/maxnum support when FMF nnan is set. 108106c3fb27SDimitry Andric Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK); 108206c3fb27SDimitry Andric return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr, 108306c3fb27SDimitry Andric "rdx.minmax"); 108406c3fb27SDimitry Andric } 1085349cc55cSDimitry Andric CmpInst::Predicate Pred = getMinMaxReductionPredicate(RK); 1086e8d8bef9SDimitry Andric Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp"); 10870b57cec5SDimitry Andric Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); 10880b57cec5SDimitry Andric return Select; 10890b57cec5SDimitry Andric } 10900b57cec5SDimitry Andric 10910b57cec5SDimitry Andric // Helper to generate an ordered reduction. 1092e8d8bef9SDimitry Andric Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, 10930eae32dcSDimitry Andric unsigned Op, RecurKind RdxKind) { 10945ffd83dbSDimitry Andric unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements(); 10950b57cec5SDimitry Andric 10960b57cec5SDimitry Andric // Extract and apply reduction ops in ascending order: 10970b57cec5SDimitry Andric // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1] 10980b57cec5SDimitry Andric Value *Result = Acc; 10990b57cec5SDimitry Andric for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) { 11000b57cec5SDimitry Andric Value *Ext = 11010b57cec5SDimitry Andric Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx)); 11020b57cec5SDimitry Andric 11030b57cec5SDimitry Andric if (Op != Instruction::ICmp && Op != Instruction::FCmp) { 11040b57cec5SDimitry Andric Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext, 11050b57cec5SDimitry Andric "bin.rdx"); 11060b57cec5SDimitry Andric } else { 1107e8d8bef9SDimitry Andric assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) && 11080b57cec5SDimitry Andric "Invalid min/max"); 1109e8d8bef9SDimitry Andric Result = createMinMaxOp(Builder, RdxKind, Result, Ext); 11100b57cec5SDimitry Andric } 11110b57cec5SDimitry Andric } 11120b57cec5SDimitry Andric 11130b57cec5SDimitry Andric return Result; 11140b57cec5SDimitry Andric } 11150b57cec5SDimitry Andric 11160b57cec5SDimitry Andric // Helper to generate a log2 shuffle reduction. 1117e8d8bef9SDimitry Andric Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, 11180fca6ea1SDimitry Andric unsigned Op, 11190fca6ea1SDimitry Andric TargetTransformInfo::ReductionShuffle RS, 11200fca6ea1SDimitry Andric RecurKind RdxKind) { 11215ffd83dbSDimitry Andric unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements(); 11220b57cec5SDimitry Andric // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles 11230b57cec5SDimitry Andric // and vector ops, reducing the set of values being computed by half each 11240b57cec5SDimitry Andric // round. 11250b57cec5SDimitry Andric assert(isPowerOf2_32(VF) && 11260b57cec5SDimitry Andric "Reduction emission only supported for pow2 vectors!"); 11270eae32dcSDimitry Andric // Note: fast-math-flags flags are controlled by the builder configuration 11280eae32dcSDimitry Andric // and are assumed to apply to all generated arithmetic instructions. Other 11290eae32dcSDimitry Andric // poison generating flags (nsw/nuw/inbounds/inrange/exact) are not part 11300eae32dcSDimitry Andric // of the builder configuration, and since they're not passed explicitly, 11310eae32dcSDimitry Andric // will never be relevant here. Note that it would be generally unsound to 11320eae32dcSDimitry Andric // propagate these from an intrinsic call to the expansion anyways as we/ 11330eae32dcSDimitry Andric // change the order of operations. 11340fca6ea1SDimitry Andric auto BuildShuffledOp = [&Builder, &Op, 11350fca6ea1SDimitry Andric &RdxKind](SmallVectorImpl<int> &ShuffleMask, 11360fca6ea1SDimitry Andric Value *&TmpVec) -> void { 11370fca6ea1SDimitry Andric Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf"); 11380fca6ea1SDimitry Andric if (Op != Instruction::ICmp && Op != Instruction::FCmp) { 11390fca6ea1SDimitry Andric TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf, 11400fca6ea1SDimitry Andric "bin.rdx"); 11410fca6ea1SDimitry Andric } else { 11420fca6ea1SDimitry Andric assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) && 11430fca6ea1SDimitry Andric "Invalid min/max"); 11440fca6ea1SDimitry Andric TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf); 11450fca6ea1SDimitry Andric } 11460fca6ea1SDimitry Andric }; 11470fca6ea1SDimitry Andric 11480b57cec5SDimitry Andric Value *TmpVec = Src; 11490fca6ea1SDimitry Andric if (TargetTransformInfo::ReductionShuffle::Pairwise == RS) { 11500fca6ea1SDimitry Andric SmallVector<int, 32> ShuffleMask(VF); 11510fca6ea1SDimitry Andric for (unsigned stride = 1; stride < VF; stride <<= 1) { 11520fca6ea1SDimitry Andric // Initialise the mask with undef. 11530fca6ea1SDimitry Andric std::fill(ShuffleMask.begin(), ShuffleMask.end(), -1); 11540fca6ea1SDimitry Andric for (unsigned j = 0; j < VF; j += stride << 1) { 11550fca6ea1SDimitry Andric ShuffleMask[j] = j + stride; 11560fca6ea1SDimitry Andric } 11570fca6ea1SDimitry Andric BuildShuffledOp(ShuffleMask, TmpVec); 11580fca6ea1SDimitry Andric } 11590fca6ea1SDimitry Andric } else { 11605ffd83dbSDimitry Andric SmallVector<int, 32> ShuffleMask(VF); 11610b57cec5SDimitry Andric for (unsigned i = VF; i != 1; i >>= 1) { 11620b57cec5SDimitry Andric // Move the upper half of the vector to the lower half. 11630b57cec5SDimitry Andric for (unsigned j = 0; j != i / 2; ++j) 11645ffd83dbSDimitry Andric ShuffleMask[j] = i / 2 + j; 11650b57cec5SDimitry Andric 11660b57cec5SDimitry Andric // Fill the rest of the mask with undef. 11675ffd83dbSDimitry Andric std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1); 11680fca6ea1SDimitry Andric BuildShuffledOp(ShuffleMask, TmpVec); 11690b57cec5SDimitry Andric } 11700b57cec5SDimitry Andric } 11710b57cec5SDimitry Andric // The result is in the first element of the vector. 11720b57cec5SDimitry Andric return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); 11730b57cec5SDimitry Andric } 11740b57cec5SDimitry Andric 11755f757f3fSDimitry Andric Value *llvm::createAnyOfTargetReduction(IRBuilderBase &Builder, Value *Src, 1176349cc55cSDimitry Andric const RecurrenceDescriptor &Desc, 1177349cc55cSDimitry Andric PHINode *OrigPhi) { 11785f757f3fSDimitry Andric assert( 11795f757f3fSDimitry Andric RecurrenceDescriptor::isAnyOfRecurrenceKind(Desc.getRecurrenceKind()) && 1180349cc55cSDimitry Andric "Unexpected reduction kind"); 1181349cc55cSDimitry Andric Value *InitVal = Desc.getRecurrenceStartValue(); 1182349cc55cSDimitry Andric Value *NewVal = nullptr; 1183349cc55cSDimitry Andric 1184349cc55cSDimitry Andric // First use the original phi to determine the new value we're trying to 1185349cc55cSDimitry Andric // select from in the loop. 1186349cc55cSDimitry Andric SelectInst *SI = nullptr; 1187349cc55cSDimitry Andric for (auto *U : OrigPhi->users()) { 1188349cc55cSDimitry Andric if ((SI = dyn_cast<SelectInst>(U))) 1189349cc55cSDimitry Andric break; 1190349cc55cSDimitry Andric } 1191349cc55cSDimitry Andric assert(SI && "One user of the original phi should be a select"); 1192349cc55cSDimitry Andric 1193349cc55cSDimitry Andric if (SI->getTrueValue() == OrigPhi) 1194349cc55cSDimitry Andric NewVal = SI->getFalseValue(); 1195349cc55cSDimitry Andric else { 1196349cc55cSDimitry Andric assert(SI->getFalseValue() == OrigPhi && 1197349cc55cSDimitry Andric "At least one input to the select should be the original Phi"); 1198349cc55cSDimitry Andric NewVal = SI->getTrueValue(); 1199349cc55cSDimitry Andric } 1200349cc55cSDimitry Andric 1201349cc55cSDimitry Andric // If any predicate is true it means that we want to select the new value. 12020fca6ea1SDimitry Andric Value *AnyOf = 12030fca6ea1SDimitry Andric Src->getType()->isVectorTy() ? Builder.CreateOrReduce(Src) : Src; 12040fca6ea1SDimitry Andric // The compares in the loop may yield poison, which propagates through the 12050fca6ea1SDimitry Andric // bitwise ORs. Freeze it here before the condition is used. 12060fca6ea1SDimitry Andric AnyOf = Builder.CreateFreeze(AnyOf); 12070fca6ea1SDimitry Andric return Builder.CreateSelect(AnyOf, NewVal, InitVal, "rdx.select"); 1208349cc55cSDimitry Andric } 1209349cc55cSDimitry Andric 12105f757f3fSDimitry Andric Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *Src, 12115f757f3fSDimitry Andric RecurKind RdxKind) { 1212e8d8bef9SDimitry Andric auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType(); 1213e8d8bef9SDimitry Andric switch (RdxKind) { 1214e8d8bef9SDimitry Andric case RecurKind::Add: 1215e8d8bef9SDimitry Andric return Builder.CreateAddReduce(Src); 1216e8d8bef9SDimitry Andric case RecurKind::Mul: 1217e8d8bef9SDimitry Andric return Builder.CreateMulReduce(Src); 1218e8d8bef9SDimitry Andric case RecurKind::And: 1219e8d8bef9SDimitry Andric return Builder.CreateAndReduce(Src); 1220e8d8bef9SDimitry Andric case RecurKind::Or: 1221e8d8bef9SDimitry Andric return Builder.CreateOrReduce(Src); 1222e8d8bef9SDimitry Andric case RecurKind::Xor: 1223e8d8bef9SDimitry Andric return Builder.CreateXorReduce(Src); 12244824e7fdSDimitry Andric case RecurKind::FMulAdd: 1225e8d8bef9SDimitry Andric case RecurKind::FAdd: 1226e8d8bef9SDimitry Andric return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy), 1227e8d8bef9SDimitry Andric Src); 1228e8d8bef9SDimitry Andric case RecurKind::FMul: 1229e8d8bef9SDimitry Andric return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src); 1230e8d8bef9SDimitry Andric case RecurKind::SMax: 1231e8d8bef9SDimitry Andric return Builder.CreateIntMaxReduce(Src, true); 1232e8d8bef9SDimitry Andric case RecurKind::SMin: 1233e8d8bef9SDimitry Andric return Builder.CreateIntMinReduce(Src, true); 1234e8d8bef9SDimitry Andric case RecurKind::UMax: 1235e8d8bef9SDimitry Andric return Builder.CreateIntMaxReduce(Src, false); 1236e8d8bef9SDimitry Andric case RecurKind::UMin: 1237e8d8bef9SDimitry Andric return Builder.CreateIntMinReduce(Src, false); 1238e8d8bef9SDimitry Andric case RecurKind::FMax: 1239e8d8bef9SDimitry Andric return Builder.CreateFPMaxReduce(Src); 1240e8d8bef9SDimitry Andric case RecurKind::FMin: 1241e8d8bef9SDimitry Andric return Builder.CreateFPMinReduce(Src); 124206c3fb27SDimitry Andric case RecurKind::FMinimum: 124306c3fb27SDimitry Andric return Builder.CreateFPMinimumReduce(Src); 124406c3fb27SDimitry Andric case RecurKind::FMaximum: 124506c3fb27SDimitry Andric return Builder.CreateFPMaximumReduce(Src); 12460b57cec5SDimitry Andric default: 12470b57cec5SDimitry Andric llvm_unreachable("Unhandled opcode"); 12480b57cec5SDimitry Andric } 12490b57cec5SDimitry Andric } 12500b57cec5SDimitry Andric 12510fca6ea1SDimitry Andric Value *llvm::createSimpleTargetReduction(VectorBuilder &VBuilder, Value *Src, 12520fca6ea1SDimitry Andric const RecurrenceDescriptor &Desc) { 12530fca6ea1SDimitry Andric RecurKind Kind = Desc.getRecurrenceKind(); 12540fca6ea1SDimitry Andric assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) && 12550fca6ea1SDimitry Andric "AnyOf reduction is not supported."); 1256*52418fc2SDimitry Andric Intrinsic::ID Id = getReductionIntrinsicID(Kind); 12570fca6ea1SDimitry Andric auto *SrcTy = cast<VectorType>(Src->getType()); 12580fca6ea1SDimitry Andric Type *SrcEltTy = SrcTy->getElementType(); 12590fca6ea1SDimitry Andric Value *Iden = 12600fca6ea1SDimitry Andric Desc.getRecurrenceIdentity(Kind, SrcEltTy, Desc.getFastMathFlags()); 12610fca6ea1SDimitry Andric Value *Ops[] = {Iden, Src}; 1262*52418fc2SDimitry Andric return VBuilder.createSimpleTargetReduction(Id, SrcTy, Ops); 12630fca6ea1SDimitry Andric } 12640fca6ea1SDimitry Andric 12655ffd83dbSDimitry Andric Value *llvm::createTargetReduction(IRBuilderBase &B, 1266349cc55cSDimitry Andric const RecurrenceDescriptor &Desc, Value *Src, 1267349cc55cSDimitry Andric PHINode *OrigPhi) { 12680b57cec5SDimitry Andric // TODO: Support in-order reductions based on the recurrence descriptor. 12690b57cec5SDimitry Andric // All ops in the reduction inherit fast-math-flags from the recurrence 12700b57cec5SDimitry Andric // descriptor. 12715ffd83dbSDimitry Andric IRBuilderBase::FastMathFlagGuard FMFGuard(B); 12720b57cec5SDimitry Andric B.setFastMathFlags(Desc.getFastMathFlags()); 1273349cc55cSDimitry Andric 1274349cc55cSDimitry Andric RecurKind RK = Desc.getRecurrenceKind(); 12755f757f3fSDimitry Andric if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) 12765f757f3fSDimitry Andric return createAnyOfTargetReduction(B, Src, Desc, OrigPhi); 1277349cc55cSDimitry Andric 12785f757f3fSDimitry Andric return createSimpleTargetReduction(B, Src, RK); 12790b57cec5SDimitry Andric } 12800b57cec5SDimitry Andric 1281fe6060f1SDimitry Andric Value *llvm::createOrderedReduction(IRBuilderBase &B, 1282fe6060f1SDimitry Andric const RecurrenceDescriptor &Desc, 1283fe6060f1SDimitry Andric Value *Src, Value *Start) { 12844824e7fdSDimitry Andric assert((Desc.getRecurrenceKind() == RecurKind::FAdd || 12854824e7fdSDimitry Andric Desc.getRecurrenceKind() == RecurKind::FMulAdd) && 1286fe6060f1SDimitry Andric "Unexpected reduction kind"); 1287fe6060f1SDimitry Andric assert(Src->getType()->isVectorTy() && "Expected a vector type"); 1288fe6060f1SDimitry Andric assert(!Start->getType()->isVectorTy() && "Expected a scalar type"); 1289fe6060f1SDimitry Andric 1290fe6060f1SDimitry Andric return B.CreateFAddReduce(Start, Src); 1291fe6060f1SDimitry Andric } 1292fe6060f1SDimitry Andric 12930fca6ea1SDimitry Andric Value *llvm::createOrderedReduction(VectorBuilder &VBuilder, 12940fca6ea1SDimitry Andric const RecurrenceDescriptor &Desc, 12950fca6ea1SDimitry Andric Value *Src, Value *Start) { 12960fca6ea1SDimitry Andric assert((Desc.getRecurrenceKind() == RecurKind::FAdd || 12970fca6ea1SDimitry Andric Desc.getRecurrenceKind() == RecurKind::FMulAdd) && 12980fca6ea1SDimitry Andric "Unexpected reduction kind"); 12990fca6ea1SDimitry Andric assert(Src->getType()->isVectorTy() && "Expected a vector type"); 13000fca6ea1SDimitry Andric assert(!Start->getType()->isVectorTy() && "Expected a scalar type"); 13010fca6ea1SDimitry Andric 1302*52418fc2SDimitry Andric Intrinsic::ID Id = getReductionIntrinsicID(RecurKind::FAdd); 13030fca6ea1SDimitry Andric auto *SrcTy = cast<VectorType>(Src->getType()); 13040fca6ea1SDimitry Andric Value *Ops[] = {Start, Src}; 1305*52418fc2SDimitry Andric return VBuilder.createSimpleTargetReduction(Id, SrcTy, Ops); 13060fca6ea1SDimitry Andric } 13070fca6ea1SDimitry Andric 130881ad6265SDimitry Andric void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue, 130981ad6265SDimitry Andric bool IncludeWrapFlags) { 13100b57cec5SDimitry Andric auto *VecOp = dyn_cast<Instruction>(I); 13110b57cec5SDimitry Andric if (!VecOp) 13120b57cec5SDimitry Andric return; 13130b57cec5SDimitry Andric auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0]) 13140b57cec5SDimitry Andric : dyn_cast<Instruction>(OpValue); 13150b57cec5SDimitry Andric if (!Intersection) 13160b57cec5SDimitry Andric return; 13170b57cec5SDimitry Andric const unsigned Opcode = Intersection->getOpcode(); 131881ad6265SDimitry Andric VecOp->copyIRFlags(Intersection, IncludeWrapFlags); 13190b57cec5SDimitry Andric for (auto *V : VL) { 13200b57cec5SDimitry Andric auto *Instr = dyn_cast<Instruction>(V); 13210b57cec5SDimitry Andric if (!Instr) 13220b57cec5SDimitry Andric continue; 13230b57cec5SDimitry Andric if (OpValue == nullptr || Opcode == Instr->getOpcode()) 13240b57cec5SDimitry Andric VecOp->andIRFlags(V); 13250b57cec5SDimitry Andric } 13260b57cec5SDimitry Andric } 13270b57cec5SDimitry Andric 13280b57cec5SDimitry Andric bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L, 13290b57cec5SDimitry Andric ScalarEvolution &SE) { 13300b57cec5SDimitry Andric const SCEV *Zero = SE.getZero(S->getType()); 13310b57cec5SDimitry Andric return SE.isAvailableAtLoopEntry(S, L) && 13320b57cec5SDimitry Andric SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, S, Zero); 13330b57cec5SDimitry Andric } 13340b57cec5SDimitry Andric 13350b57cec5SDimitry Andric bool llvm::isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, 13360b57cec5SDimitry Andric ScalarEvolution &SE) { 13370b57cec5SDimitry Andric const SCEV *Zero = SE.getZero(S->getType()); 13380b57cec5SDimitry Andric return SE.isAvailableAtLoopEntry(S, L) && 13390b57cec5SDimitry Andric SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero); 13400b57cec5SDimitry Andric } 13410b57cec5SDimitry Andric 134206c3fb27SDimitry Andric bool llvm::isKnownPositiveInLoop(const SCEV *S, const Loop *L, 134306c3fb27SDimitry Andric ScalarEvolution &SE) { 134406c3fb27SDimitry Andric const SCEV *Zero = SE.getZero(S->getType()); 134506c3fb27SDimitry Andric return SE.isAvailableAtLoopEntry(S, L) && 134606c3fb27SDimitry Andric SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, S, Zero); 134706c3fb27SDimitry Andric } 134806c3fb27SDimitry Andric 134906c3fb27SDimitry Andric bool llvm::isKnownNonPositiveInLoop(const SCEV *S, const Loop *L, 135006c3fb27SDimitry Andric ScalarEvolution &SE) { 135106c3fb27SDimitry Andric const SCEV *Zero = SE.getZero(S->getType()); 135206c3fb27SDimitry Andric return SE.isAvailableAtLoopEntry(S, L) && 135306c3fb27SDimitry Andric SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLE, S, Zero); 135406c3fb27SDimitry Andric } 135506c3fb27SDimitry Andric 13560b57cec5SDimitry Andric bool llvm::cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, 13570b57cec5SDimitry Andric bool Signed) { 13580b57cec5SDimitry Andric unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth(); 13590b57cec5SDimitry Andric APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) : 13600b57cec5SDimitry Andric APInt::getMinValue(BitWidth); 13610b57cec5SDimitry Andric auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; 13620b57cec5SDimitry Andric return SE.isAvailableAtLoopEntry(S, L) && 13630b57cec5SDimitry Andric SE.isLoopEntryGuardedByCond(L, Predicate, S, 13640b57cec5SDimitry Andric SE.getConstant(Min)); 13650b57cec5SDimitry Andric } 13660b57cec5SDimitry Andric 13670b57cec5SDimitry Andric bool llvm::cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, 13680b57cec5SDimitry Andric bool Signed) { 13690b57cec5SDimitry Andric unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth(); 13700b57cec5SDimitry Andric APInt Max = Signed ? APInt::getSignedMaxValue(BitWidth) : 13710b57cec5SDimitry Andric APInt::getMaxValue(BitWidth); 13720b57cec5SDimitry Andric auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; 13730b57cec5SDimitry Andric return SE.isAvailableAtLoopEntry(S, L) && 13740b57cec5SDimitry Andric SE.isLoopEntryGuardedByCond(L, Predicate, S, 13750b57cec5SDimitry Andric SE.getConstant(Max)); 13760b57cec5SDimitry Andric } 13775ffd83dbSDimitry Andric 13785ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 13795ffd83dbSDimitry Andric // rewriteLoopExitValues - Optimize IV users outside the loop. 13805ffd83dbSDimitry Andric // As a side effect, reduces the amount of IV processing within the loop. 13815ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 13825ffd83dbSDimitry Andric 13835ffd83dbSDimitry Andric static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) { 13845ffd83dbSDimitry Andric SmallPtrSet<const Instruction *, 8> Visited; 13855ffd83dbSDimitry Andric SmallVector<const Instruction *, 8> WorkList; 13865ffd83dbSDimitry Andric Visited.insert(I); 13875ffd83dbSDimitry Andric WorkList.push_back(I); 13885ffd83dbSDimitry Andric while (!WorkList.empty()) { 13895ffd83dbSDimitry Andric const Instruction *Curr = WorkList.pop_back_val(); 13905ffd83dbSDimitry Andric // This use is outside the loop, nothing to do. 13915ffd83dbSDimitry Andric if (!L->contains(Curr)) 13925ffd83dbSDimitry Andric continue; 13935ffd83dbSDimitry Andric // Do we assume it is a "hard" use which will not be eliminated easily? 13945ffd83dbSDimitry Andric if (Curr->mayHaveSideEffects()) 13955ffd83dbSDimitry Andric return true; 13965ffd83dbSDimitry Andric // Otherwise, add all its users to worklist. 1397bdd1243dSDimitry Andric for (const auto *U : Curr->users()) { 13985ffd83dbSDimitry Andric auto *UI = cast<Instruction>(U); 13995ffd83dbSDimitry Andric if (Visited.insert(UI).second) 14005ffd83dbSDimitry Andric WorkList.push_back(UI); 14015ffd83dbSDimitry Andric } 14025ffd83dbSDimitry Andric } 14035ffd83dbSDimitry Andric return false; 14045ffd83dbSDimitry Andric } 14055ffd83dbSDimitry Andric 14065ffd83dbSDimitry Andric // Collect information about PHI nodes which can be transformed in 14075ffd83dbSDimitry Andric // rewriteLoopExitValues. 14085ffd83dbSDimitry Andric struct RewritePhi { 14095ffd83dbSDimitry Andric PHINode *PN; // For which PHI node is this replacement? 14105ffd83dbSDimitry Andric unsigned Ith; // For which incoming value? 14115ffd83dbSDimitry Andric const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting. 14125ffd83dbSDimitry Andric Instruction *ExpansionPoint; // Where we'd like to expand that SCEV? 14135ffd83dbSDimitry Andric bool HighCost; // Is this expansion a high-cost? 14145ffd83dbSDimitry Andric 14155ffd83dbSDimitry Andric RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt, 14165ffd83dbSDimitry Andric bool H) 14175ffd83dbSDimitry Andric : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt), 14185ffd83dbSDimitry Andric HighCost(H) {} 14195ffd83dbSDimitry Andric }; 14205ffd83dbSDimitry Andric 14215ffd83dbSDimitry Andric // Check whether it is possible to delete the loop after rewriting exit 14225ffd83dbSDimitry Andric // value. If it is possible, ignore ReplaceExitValue and do rewriting 14235ffd83dbSDimitry Andric // aggressively. 14245ffd83dbSDimitry Andric static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) { 14255ffd83dbSDimitry Andric BasicBlock *Preheader = L->getLoopPreheader(); 14265ffd83dbSDimitry Andric // If there is no preheader, the loop will not be deleted. 14275ffd83dbSDimitry Andric if (!Preheader) 14285ffd83dbSDimitry Andric return false; 14295ffd83dbSDimitry Andric 14305ffd83dbSDimitry Andric // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. 14315ffd83dbSDimitry Andric // We obviate multiple ExitingBlocks case for simplicity. 14325ffd83dbSDimitry Andric // TODO: If we see testcase with multiple ExitingBlocks can be deleted 14335ffd83dbSDimitry Andric // after exit value rewriting, we can enhance the logic here. 14345ffd83dbSDimitry Andric SmallVector<BasicBlock *, 4> ExitingBlocks; 14355ffd83dbSDimitry Andric L->getExitingBlocks(ExitingBlocks); 14365ffd83dbSDimitry Andric SmallVector<BasicBlock *, 8> ExitBlocks; 14375ffd83dbSDimitry Andric L->getUniqueExitBlocks(ExitBlocks); 14385ffd83dbSDimitry Andric if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1) 14395ffd83dbSDimitry Andric return false; 14405ffd83dbSDimitry Andric 14415ffd83dbSDimitry Andric BasicBlock *ExitBlock = ExitBlocks[0]; 14425ffd83dbSDimitry Andric BasicBlock::iterator BI = ExitBlock->begin(); 14435ffd83dbSDimitry Andric while (PHINode *P = dyn_cast<PHINode>(BI)) { 14445ffd83dbSDimitry Andric Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); 14455ffd83dbSDimitry Andric 14465ffd83dbSDimitry Andric // If the Incoming value of P is found in RewritePhiSet, we know it 14475ffd83dbSDimitry Andric // could be rewritten to use a loop invariant value in transformation 14485ffd83dbSDimitry Andric // phase later. Skip it in the loop invariant check below. 14495ffd83dbSDimitry Andric bool found = false; 14505ffd83dbSDimitry Andric for (const RewritePhi &Phi : RewritePhiSet) { 14515ffd83dbSDimitry Andric unsigned i = Phi.Ith; 14525ffd83dbSDimitry Andric if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { 14535ffd83dbSDimitry Andric found = true; 14545ffd83dbSDimitry Andric break; 14555ffd83dbSDimitry Andric } 14565ffd83dbSDimitry Andric } 14575ffd83dbSDimitry Andric 14585ffd83dbSDimitry Andric Instruction *I; 14595ffd83dbSDimitry Andric if (!found && (I = dyn_cast<Instruction>(Incoming))) 14605ffd83dbSDimitry Andric if (!L->hasLoopInvariantOperands(I)) 14615ffd83dbSDimitry Andric return false; 14625ffd83dbSDimitry Andric 14635ffd83dbSDimitry Andric ++BI; 14645ffd83dbSDimitry Andric } 14655ffd83dbSDimitry Andric 14665ffd83dbSDimitry Andric for (auto *BB : L->blocks()) 14675ffd83dbSDimitry Andric if (llvm::any_of(*BB, [](Instruction &I) { 14685ffd83dbSDimitry Andric return I.mayHaveSideEffects(); 14695ffd83dbSDimitry Andric })) 14705ffd83dbSDimitry Andric return false; 14715ffd83dbSDimitry Andric 14725ffd83dbSDimitry Andric return true; 14735ffd83dbSDimitry Andric } 14745ffd83dbSDimitry Andric 1475753f127fSDimitry Andric /// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi, 1476753f127fSDimitry Andric /// and returns true if this Phi is an induction phi in the loop. When 1477753f127fSDimitry Andric /// isInductionPHI returns true, \p ID will be also be set by isInductionPHI. 1478753f127fSDimitry Andric static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE, 1479753f127fSDimitry Andric InductionDescriptor &ID) { 1480753f127fSDimitry Andric if (!Phi) 1481753f127fSDimitry Andric return false; 1482753f127fSDimitry Andric if (!L->getLoopPreheader()) 1483753f127fSDimitry Andric return false; 1484753f127fSDimitry Andric if (Phi->getParent() != L->getHeader()) 1485753f127fSDimitry Andric return false; 1486753f127fSDimitry Andric return InductionDescriptor::isInductionPHI(Phi, L, SE, ID); 1487753f127fSDimitry Andric } 1488753f127fSDimitry Andric 14895ffd83dbSDimitry Andric int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, 14905ffd83dbSDimitry Andric ScalarEvolution *SE, 14915ffd83dbSDimitry Andric const TargetTransformInfo *TTI, 14925ffd83dbSDimitry Andric SCEVExpander &Rewriter, DominatorTree *DT, 14935ffd83dbSDimitry Andric ReplaceExitVal ReplaceExitValue, 14945ffd83dbSDimitry Andric SmallVector<WeakTrackingVH, 16> &DeadInsts) { 14955ffd83dbSDimitry Andric // Check a pre-condition. 14965ffd83dbSDimitry Andric assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 14975ffd83dbSDimitry Andric "Indvars did not preserve LCSSA!"); 14985ffd83dbSDimitry Andric 14995ffd83dbSDimitry Andric SmallVector<BasicBlock*, 8> ExitBlocks; 15005ffd83dbSDimitry Andric L->getUniqueExitBlocks(ExitBlocks); 15015ffd83dbSDimitry Andric 15025ffd83dbSDimitry Andric SmallVector<RewritePhi, 8> RewritePhiSet; 15035ffd83dbSDimitry Andric // Find all values that are computed inside the loop, but used outside of it. 15045ffd83dbSDimitry Andric // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan 15055ffd83dbSDimitry Andric // the exit blocks of the loop to find them. 15065ffd83dbSDimitry Andric for (BasicBlock *ExitBB : ExitBlocks) { 15075ffd83dbSDimitry Andric // If there are no PHI nodes in this exit block, then no values defined 15085ffd83dbSDimitry Andric // inside the loop are used on this path, skip it. 15095ffd83dbSDimitry Andric PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); 15105ffd83dbSDimitry Andric if (!PN) continue; 15115ffd83dbSDimitry Andric 15125ffd83dbSDimitry Andric unsigned NumPreds = PN->getNumIncomingValues(); 15135ffd83dbSDimitry Andric 15145ffd83dbSDimitry Andric // Iterate over all of the PHI nodes. 15155ffd83dbSDimitry Andric BasicBlock::iterator BBI = ExitBB->begin(); 15165ffd83dbSDimitry Andric while ((PN = dyn_cast<PHINode>(BBI++))) { 15175ffd83dbSDimitry Andric if (PN->use_empty()) 15185ffd83dbSDimitry Andric continue; // dead use, don't replace it 15195ffd83dbSDimitry Andric 15205ffd83dbSDimitry Andric if (!SE->isSCEVable(PN->getType())) 15215ffd83dbSDimitry Andric continue; 15225ffd83dbSDimitry Andric 15235ffd83dbSDimitry Andric // Iterate over all of the values in all the PHI nodes. 15245ffd83dbSDimitry Andric for (unsigned i = 0; i != NumPreds; ++i) { 15255ffd83dbSDimitry Andric // If the value being merged in is not integer or is not defined 15265ffd83dbSDimitry Andric // in the loop, skip it. 15275ffd83dbSDimitry Andric Value *InVal = PN->getIncomingValue(i); 15285ffd83dbSDimitry Andric if (!isa<Instruction>(InVal)) 15295ffd83dbSDimitry Andric continue; 15305ffd83dbSDimitry Andric 15315ffd83dbSDimitry Andric // If this pred is for a subloop, not L itself, skip it. 15325ffd83dbSDimitry Andric if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) 15335ffd83dbSDimitry Andric continue; // The Block is in a subloop, skip it. 15345ffd83dbSDimitry Andric 15355ffd83dbSDimitry Andric // Check that InVal is defined in the loop. 15365ffd83dbSDimitry Andric Instruction *Inst = cast<Instruction>(InVal); 15375ffd83dbSDimitry Andric if (!L->contains(Inst)) 15385ffd83dbSDimitry Andric continue; 15395ffd83dbSDimitry Andric 1540753f127fSDimitry Andric // Find exit values which are induction variables in the loop, and are 1541753f127fSDimitry Andric // unused in the loop, with the only use being the exit block PhiNode, 1542753f127fSDimitry Andric // and the induction variable update binary operator. 1543753f127fSDimitry Andric // The exit value can be replaced with the final value when it is cheap 1544753f127fSDimitry Andric // to do so. 1545753f127fSDimitry Andric if (ReplaceExitValue == UnusedIndVarInLoop) { 1546753f127fSDimitry Andric InductionDescriptor ID; 1547753f127fSDimitry Andric PHINode *IndPhi = dyn_cast<PHINode>(Inst); 1548753f127fSDimitry Andric if (IndPhi) { 1549753f127fSDimitry Andric if (!checkIsIndPhi(IndPhi, L, SE, ID)) 1550753f127fSDimitry Andric continue; 1551753f127fSDimitry Andric // This is an induction PHI. Check that the only users are PHI 1552753f127fSDimitry Andric // nodes, and induction variable update binary operators. 1553753f127fSDimitry Andric if (llvm::any_of(Inst->users(), [&](User *U) { 1554753f127fSDimitry Andric if (!isa<PHINode>(U) && !isa<BinaryOperator>(U)) 1555753f127fSDimitry Andric return true; 1556753f127fSDimitry Andric BinaryOperator *B = dyn_cast<BinaryOperator>(U); 1557753f127fSDimitry Andric if (B && B != ID.getInductionBinOp()) 1558753f127fSDimitry Andric return true; 1559753f127fSDimitry Andric return false; 1560753f127fSDimitry Andric })) 1561753f127fSDimitry Andric continue; 1562753f127fSDimitry Andric } else { 1563753f127fSDimitry Andric // If it is not an induction phi, it must be an induction update 1564753f127fSDimitry Andric // binary operator with an induction phi user. 1565753f127fSDimitry Andric BinaryOperator *B = dyn_cast<BinaryOperator>(Inst); 1566753f127fSDimitry Andric if (!B) 1567753f127fSDimitry Andric continue; 1568753f127fSDimitry Andric if (llvm::any_of(Inst->users(), [&](User *U) { 1569753f127fSDimitry Andric PHINode *Phi = dyn_cast<PHINode>(U); 1570753f127fSDimitry Andric if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID)) 1571753f127fSDimitry Andric return true; 1572753f127fSDimitry Andric return false; 1573753f127fSDimitry Andric })) 1574753f127fSDimitry Andric continue; 1575753f127fSDimitry Andric if (B != ID.getInductionBinOp()) 1576753f127fSDimitry Andric continue; 1577753f127fSDimitry Andric } 1578753f127fSDimitry Andric } 1579753f127fSDimitry Andric 15805ffd83dbSDimitry Andric // Okay, this instruction has a user outside of the current loop 15815ffd83dbSDimitry Andric // and varies predictably *inside* the loop. Evaluate the value it 15825ffd83dbSDimitry Andric // contains when the loop exits, if possible. We prefer to start with 15835ffd83dbSDimitry Andric // expressions which are true for all exits (so as to maximize 15845ffd83dbSDimitry Andric // expression reuse by the SCEVExpander), but resort to per-exit 15855ffd83dbSDimitry Andric // evaluation if that fails. 15865ffd83dbSDimitry Andric const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); 15875ffd83dbSDimitry Andric if (isa<SCEVCouldNotCompute>(ExitValue) || 15885ffd83dbSDimitry Andric !SE->isLoopInvariant(ExitValue, L) || 1589fcaf7f86SDimitry Andric !Rewriter.isSafeToExpand(ExitValue)) { 15905ffd83dbSDimitry Andric // TODO: This should probably be sunk into SCEV in some way; maybe a 15915ffd83dbSDimitry Andric // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for 15925ffd83dbSDimitry Andric // most SCEV expressions and other recurrence types (e.g. shift 15935ffd83dbSDimitry Andric // recurrences). Is there existing code we can reuse? 15945ffd83dbSDimitry Andric const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i)); 15955ffd83dbSDimitry Andric if (isa<SCEVCouldNotCompute>(ExitCount)) 15965ffd83dbSDimitry Andric continue; 15975ffd83dbSDimitry Andric if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst))) 15985ffd83dbSDimitry Andric if (AddRec->getLoop() == L) 15995ffd83dbSDimitry Andric ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); 16005ffd83dbSDimitry Andric if (isa<SCEVCouldNotCompute>(ExitValue) || 16015ffd83dbSDimitry Andric !SE->isLoopInvariant(ExitValue, L) || 1602fcaf7f86SDimitry Andric !Rewriter.isSafeToExpand(ExitValue)) 16035ffd83dbSDimitry Andric continue; 16045ffd83dbSDimitry Andric } 16055ffd83dbSDimitry Andric 16065ffd83dbSDimitry Andric // Computing the value outside of the loop brings no benefit if it is 16075ffd83dbSDimitry Andric // definitely used inside the loop in a way which can not be optimized 16085ffd83dbSDimitry Andric // away. Avoid doing so unless we know we have a value which computes 16095ffd83dbSDimitry Andric // the ExitValue already. TODO: This should be merged into SCEV 16105ffd83dbSDimitry Andric // expander to leverage its knowledge of existing expressions. 16115ffd83dbSDimitry Andric if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(ExitValue) && 16125ffd83dbSDimitry Andric !isa<SCEVUnknown>(ExitValue) && hasHardUserWithinLoop(L, Inst)) 16135ffd83dbSDimitry Andric continue; 16145ffd83dbSDimitry Andric 16155ffd83dbSDimitry Andric // Check if expansions of this SCEV would count as being high cost. 16165ffd83dbSDimitry Andric bool HighCost = Rewriter.isHighCostExpansion( 16175ffd83dbSDimitry Andric ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst); 16185ffd83dbSDimitry Andric 16195ffd83dbSDimitry Andric // Note that we must not perform expansions until after 16205ffd83dbSDimitry Andric // we query *all* the costs, because if we perform temporary expansion 16215ffd83dbSDimitry Andric // inbetween, one that we might not intend to keep, said expansion 16225f757f3fSDimitry Andric // *may* affect cost calculation of the next SCEV's we'll query, 16235ffd83dbSDimitry Andric // and next SCEV may errneously get smaller cost. 16245ffd83dbSDimitry Andric 16255ffd83dbSDimitry Andric // Collect all the candidate PHINodes to be rewritten. 1626a4a491e2SDimitry Andric Instruction *InsertPt = 1627a4a491e2SDimitry Andric (isa<PHINode>(Inst) || isa<LandingPadInst>(Inst)) ? 1628a4a491e2SDimitry Andric &*Inst->getParent()->getFirstInsertionPt() : Inst; 1629a4a491e2SDimitry Andric RewritePhiSet.emplace_back(PN, i, ExitValue, InsertPt, HighCost); 16305ffd83dbSDimitry Andric } 16315ffd83dbSDimitry Andric } 16325ffd83dbSDimitry Andric } 16335ffd83dbSDimitry Andric 1634349cc55cSDimitry Andric // TODO: evaluate whether it is beneficial to change how we calculate 1635349cc55cSDimitry Andric // high-cost: if we have SCEV 'A' which we know we will expand, should we 1636349cc55cSDimitry Andric // calculate the cost of other SCEV's after expanding SCEV 'A', thus 1637349cc55cSDimitry Andric // potentially giving cost bonus to those other SCEV's? 16385ffd83dbSDimitry Andric 16395ffd83dbSDimitry Andric bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); 16405ffd83dbSDimitry Andric int NumReplaced = 0; 16415ffd83dbSDimitry Andric 16425ffd83dbSDimitry Andric // Transformation. 16435ffd83dbSDimitry Andric for (const RewritePhi &Phi : RewritePhiSet) { 16445ffd83dbSDimitry Andric PHINode *PN = Phi.PN; 16455ffd83dbSDimitry Andric 16465ffd83dbSDimitry Andric // Only do the rewrite when the ExitValue can be expanded cheaply. 16475ffd83dbSDimitry Andric // If LoopCanBeDel is true, rewrite exit value aggressively. 1648753f127fSDimitry Andric if ((ReplaceExitValue == OnlyCheapRepl || 1649753f127fSDimitry Andric ReplaceExitValue == UnusedIndVarInLoop) && 1650753f127fSDimitry Andric !LoopCanBeDel && Phi.HighCost) 16515ffd83dbSDimitry Andric continue; 1652349cc55cSDimitry Andric 1653349cc55cSDimitry Andric Value *ExitVal = Rewriter.expandCodeFor( 1654349cc55cSDimitry Andric Phi.ExpansionSCEV, Phi.PN->getType(), Phi.ExpansionPoint); 1655349cc55cSDimitry Andric 1656349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal 1657349cc55cSDimitry Andric << '\n' 1658349cc55cSDimitry Andric << " LoopVal = " << *(Phi.ExpansionPoint) << "\n"); 1659349cc55cSDimitry Andric 1660349cc55cSDimitry Andric #ifndef NDEBUG 1661349cc55cSDimitry Andric // If we reuse an instruction from a loop which is neither L nor one of 1662349cc55cSDimitry Andric // its containing loops, we end up breaking LCSSA form for this loop by 1663349cc55cSDimitry Andric // creating a new use of its instruction. 1664349cc55cSDimitry Andric if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal)) 1665349cc55cSDimitry Andric if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) 1666349cc55cSDimitry Andric if (EVL != L) 1667349cc55cSDimitry Andric assert(EVL->contains(L) && "LCSSA breach detected!"); 1668349cc55cSDimitry Andric #endif 16695ffd83dbSDimitry Andric 16705ffd83dbSDimitry Andric NumReplaced++; 16715ffd83dbSDimitry Andric Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith)); 16725ffd83dbSDimitry Andric PN->setIncomingValue(Phi.Ith, ExitVal); 1673349cc55cSDimitry Andric // It's necessary to tell ScalarEvolution about this explicitly so that 1674349cc55cSDimitry Andric // it can walk the def-use list and forget all SCEVs, as it may not be 1675349cc55cSDimitry Andric // watching the PHI itself. Once the new exit value is in place, there 1676349cc55cSDimitry Andric // may not be a def-use connection between the loop and every instruction 1677349cc55cSDimitry Andric // which got a SCEVAddRecExpr for that loop. 1678349cc55cSDimitry Andric SE->forgetValue(PN); 16795ffd83dbSDimitry Andric 16805ffd83dbSDimitry Andric // If this instruction is dead now, delete it. Don't do it now to avoid 16815ffd83dbSDimitry Andric // invalidating iterators. 16825ffd83dbSDimitry Andric if (isInstructionTriviallyDead(Inst, TLI)) 16835ffd83dbSDimitry Andric DeadInsts.push_back(Inst); 16845ffd83dbSDimitry Andric 16855ffd83dbSDimitry Andric // Replace PN with ExitVal if that is legal and does not break LCSSA. 16865ffd83dbSDimitry Andric if (PN->getNumIncomingValues() == 1 && 16875ffd83dbSDimitry Andric LI->replacementPreservesLCSSAForm(PN, ExitVal)) { 16885ffd83dbSDimitry Andric PN->replaceAllUsesWith(ExitVal); 16895ffd83dbSDimitry Andric PN->eraseFromParent(); 16905ffd83dbSDimitry Andric } 16915ffd83dbSDimitry Andric } 16925ffd83dbSDimitry Andric 16935ffd83dbSDimitry Andric // The insertion point instruction may have been deleted; clear it out 16945ffd83dbSDimitry Andric // so that the rewriter doesn't trip over it later. 16955ffd83dbSDimitry Andric Rewriter.clearInsertPoint(); 16965ffd83dbSDimitry Andric return NumReplaced; 16975ffd83dbSDimitry Andric } 16985ffd83dbSDimitry Andric 16995ffd83dbSDimitry Andric /// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for 17005ffd83dbSDimitry Andric /// \p OrigLoop. 17015ffd83dbSDimitry Andric void llvm::setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop, 17025ffd83dbSDimitry Andric Loop *RemainderLoop, uint64_t UF) { 17035ffd83dbSDimitry Andric assert(UF > 0 && "Zero unrolled factor is not supported"); 17045ffd83dbSDimitry Andric assert(UnrolledLoop != RemainderLoop && 17055ffd83dbSDimitry Andric "Unrolled and Remainder loops are expected to distinct"); 17065ffd83dbSDimitry Andric 17075ffd83dbSDimitry Andric // Get number of iterations in the original scalar loop. 17085ffd83dbSDimitry Andric unsigned OrigLoopInvocationWeight = 0; 1709bdd1243dSDimitry Andric std::optional<unsigned> OrigAverageTripCount = 17105ffd83dbSDimitry Andric getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight); 17115ffd83dbSDimitry Andric if (!OrigAverageTripCount) 17125ffd83dbSDimitry Andric return; 17135ffd83dbSDimitry Andric 17145ffd83dbSDimitry Andric // Calculate number of iterations in unrolled loop. 17155ffd83dbSDimitry Andric unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF; 17165ffd83dbSDimitry Andric // Calculate number of iterations for remainder loop. 17175ffd83dbSDimitry Andric unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF; 17185ffd83dbSDimitry Andric 17195ffd83dbSDimitry Andric setLoopEstimatedTripCount(UnrolledLoop, UnrolledAverageTripCount, 17205ffd83dbSDimitry Andric OrigLoopInvocationWeight); 17215ffd83dbSDimitry Andric setLoopEstimatedTripCount(RemainderLoop, RemainderAverageTripCount, 17225ffd83dbSDimitry Andric OrigLoopInvocationWeight); 17235ffd83dbSDimitry Andric } 17245ffd83dbSDimitry Andric 17255ffd83dbSDimitry Andric /// Utility that implements appending of loops onto a worklist. 17265ffd83dbSDimitry Andric /// Loops are added in preorder (analogous for reverse postorder for trees), 17275ffd83dbSDimitry Andric /// and the worklist is processed LIFO. 17285ffd83dbSDimitry Andric template <typename RangeT> 17295ffd83dbSDimitry Andric void llvm::appendReversedLoopsToWorklist( 17305ffd83dbSDimitry Andric RangeT &&Loops, SmallPriorityWorklist<Loop *, 4> &Worklist) { 17315ffd83dbSDimitry Andric // We use an internal worklist to build up the preorder traversal without 17325ffd83dbSDimitry Andric // recursion. 17335ffd83dbSDimitry Andric SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist; 17345ffd83dbSDimitry Andric 17355ffd83dbSDimitry Andric // We walk the initial sequence of loops in reverse because we generally want 17365ffd83dbSDimitry Andric // to visit defs before uses and the worklist is LIFO. 17375ffd83dbSDimitry Andric for (Loop *RootL : Loops) { 17385ffd83dbSDimitry Andric assert(PreOrderLoops.empty() && "Must start with an empty preorder walk."); 17395ffd83dbSDimitry Andric assert(PreOrderWorklist.empty() && 17405ffd83dbSDimitry Andric "Must start with an empty preorder walk worklist."); 17415ffd83dbSDimitry Andric PreOrderWorklist.push_back(RootL); 17425ffd83dbSDimitry Andric do { 17435ffd83dbSDimitry Andric Loop *L = PreOrderWorklist.pop_back_val(); 17445ffd83dbSDimitry Andric PreOrderWorklist.append(L->begin(), L->end()); 17455ffd83dbSDimitry Andric PreOrderLoops.push_back(L); 17465ffd83dbSDimitry Andric } while (!PreOrderWorklist.empty()); 17475ffd83dbSDimitry Andric 17485ffd83dbSDimitry Andric Worklist.insert(std::move(PreOrderLoops)); 17495ffd83dbSDimitry Andric PreOrderLoops.clear(); 17505ffd83dbSDimitry Andric } 17515ffd83dbSDimitry Andric } 17525ffd83dbSDimitry Andric 17535ffd83dbSDimitry Andric template <typename RangeT> 17545ffd83dbSDimitry Andric void llvm::appendLoopsToWorklist(RangeT &&Loops, 17555ffd83dbSDimitry Andric SmallPriorityWorklist<Loop *, 4> &Worklist) { 17565ffd83dbSDimitry Andric appendReversedLoopsToWorklist(reverse(Loops), Worklist); 17575ffd83dbSDimitry Andric } 17585ffd83dbSDimitry Andric 17595ffd83dbSDimitry Andric template void llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>( 17605ffd83dbSDimitry Andric ArrayRef<Loop *> &Loops, SmallPriorityWorklist<Loop *, 4> &Worklist); 17615ffd83dbSDimitry Andric 17625ffd83dbSDimitry Andric template void 17635ffd83dbSDimitry Andric llvm::appendLoopsToWorklist<Loop &>(Loop &L, 17645ffd83dbSDimitry Andric SmallPriorityWorklist<Loop *, 4> &Worklist); 17655ffd83dbSDimitry Andric 17665ffd83dbSDimitry Andric void llvm::appendLoopsToWorklist(LoopInfo &LI, 17675ffd83dbSDimitry Andric SmallPriorityWorklist<Loop *, 4> &Worklist) { 17685ffd83dbSDimitry Andric appendReversedLoopsToWorklist(LI, Worklist); 17695ffd83dbSDimitry Andric } 17705ffd83dbSDimitry Andric 17715ffd83dbSDimitry Andric Loop *llvm::cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, 17725ffd83dbSDimitry Andric LoopInfo *LI, LPPassManager *LPM) { 17735ffd83dbSDimitry Andric Loop &New = *LI->AllocateLoop(); 17745ffd83dbSDimitry Andric if (PL) 17755ffd83dbSDimitry Andric PL->addChildLoop(&New); 17765ffd83dbSDimitry Andric else 17775ffd83dbSDimitry Andric LI->addTopLevelLoop(&New); 17785ffd83dbSDimitry Andric 17795ffd83dbSDimitry Andric if (LPM) 17805ffd83dbSDimitry Andric LPM->addLoop(New); 17815ffd83dbSDimitry Andric 17825ffd83dbSDimitry Andric // Add all of the blocks in L to the new loop. 17835e801ac6SDimitry Andric for (BasicBlock *BB : L->blocks()) 17845e801ac6SDimitry Andric if (LI->getLoopFor(BB) == L) 17855e801ac6SDimitry Andric New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), *LI); 17865ffd83dbSDimitry Andric 17875ffd83dbSDimitry Andric // Add all of the subloops to the new loop. 17885ffd83dbSDimitry Andric for (Loop *I : *L) 17895ffd83dbSDimitry Andric cloneLoop(I, &New, VM, LI, LPM); 17905ffd83dbSDimitry Andric 17915ffd83dbSDimitry Andric return &New; 17925ffd83dbSDimitry Andric } 17935ffd83dbSDimitry Andric 17945ffd83dbSDimitry Andric /// IR Values for the lower and upper bounds of a pointer evolution. We 17955ffd83dbSDimitry Andric /// need to use value-handles because SCEV expansion can invalidate previously 17965ffd83dbSDimitry Andric /// expanded values. Thus expansion of a pointer can invalidate the bounds for 17975ffd83dbSDimitry Andric /// a previous one. 17985ffd83dbSDimitry Andric struct PointerBounds { 17995ffd83dbSDimitry Andric TrackingVH<Value> Start; 18005ffd83dbSDimitry Andric TrackingVH<Value> End; 18015f757f3fSDimitry Andric Value *StrideToCheck; 18025ffd83dbSDimitry Andric }; 18035ffd83dbSDimitry Andric 18045ffd83dbSDimitry Andric /// Expand code for the lower and upper bound of the pointer group \p CG 18055ffd83dbSDimitry Andric /// in \p TheLoop. \return the values for the bounds. 18065ffd83dbSDimitry Andric static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, 18075ffd83dbSDimitry Andric Loop *TheLoop, Instruction *Loc, 18085f757f3fSDimitry Andric SCEVExpander &Exp, bool HoistRuntimeChecks) { 18095ffd83dbSDimitry Andric LLVMContext &Ctx = Loc->getContext(); 18105f757f3fSDimitry Andric Type *PtrArithTy = PointerType::get(Ctx, CG->AddressSpace); 18115ffd83dbSDimitry Andric 18125ffd83dbSDimitry Andric Value *Start = nullptr, *End = nullptr; 18135ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); 18145f757f3fSDimitry Andric const SCEV *Low = CG->Low, *High = CG->High, *Stride = nullptr; 18155f757f3fSDimitry Andric 18165f757f3fSDimitry Andric // If the Low and High values are themselves loop-variant, then we may want 18175f757f3fSDimitry Andric // to expand the range to include those covered by the outer loop as well. 18185f757f3fSDimitry Andric // There is a trade-off here with the advantage being that creating checks 18195f757f3fSDimitry Andric // using the expanded range permits the runtime memory checks to be hoisted 18205f757f3fSDimitry Andric // out of the outer loop. This reduces the cost of entering the inner loop, 18215f757f3fSDimitry Andric // which can be significant for low trip counts. The disadvantage is that 18225f757f3fSDimitry Andric // there is a chance we may now never enter the vectorized inner loop, 18235f757f3fSDimitry Andric // whereas using a restricted range check could have allowed us to enter at 18245f757f3fSDimitry Andric // least once. This is why the behaviour is not currently the default and is 18255f757f3fSDimitry Andric // controlled by the parameter 'HoistRuntimeChecks'. 18265f757f3fSDimitry Andric if (HoistRuntimeChecks && TheLoop->getParentLoop() && 18275f757f3fSDimitry Andric isa<SCEVAddRecExpr>(High) && isa<SCEVAddRecExpr>(Low)) { 18285f757f3fSDimitry Andric auto *HighAR = cast<SCEVAddRecExpr>(High); 18295f757f3fSDimitry Andric auto *LowAR = cast<SCEVAddRecExpr>(Low); 18305f757f3fSDimitry Andric const Loop *OuterLoop = TheLoop->getParentLoop(); 18310fca6ea1SDimitry Andric ScalarEvolution &SE = *Exp.getSE(); 18320fca6ea1SDimitry Andric const SCEV *Recur = LowAR->getStepRecurrence(SE); 18330fca6ea1SDimitry Andric if (Recur == HighAR->getStepRecurrence(SE) && 18345f757f3fSDimitry Andric HighAR->getLoop() == OuterLoop && LowAR->getLoop() == OuterLoop) { 18355f757f3fSDimitry Andric BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); 18360fca6ea1SDimitry Andric const SCEV *OuterExitCount = SE.getExitCount(OuterLoop, OuterLoopLatch); 18375f757f3fSDimitry Andric if (!isa<SCEVCouldNotCompute>(OuterExitCount) && 18385f757f3fSDimitry Andric OuterExitCount->getType()->isIntegerTy()) { 18390fca6ea1SDimitry Andric const SCEV *NewHigh = 18400fca6ea1SDimitry Andric cast<SCEVAddRecExpr>(High)->evaluateAtIteration(OuterExitCount, SE); 18415f757f3fSDimitry Andric if (!isa<SCEVCouldNotCompute>(NewHigh)) { 18425f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Expanded RT check for range to include " 18435f757f3fSDimitry Andric "outer loop in order to permit hoisting\n"); 18445f757f3fSDimitry Andric High = NewHigh; 18455f757f3fSDimitry Andric Low = cast<SCEVAddRecExpr>(Low)->getStart(); 18465f757f3fSDimitry Andric // If there is a possibility that the stride is negative then we have 18475f757f3fSDimitry Andric // to generate extra checks to ensure the stride is positive. 18480fca6ea1SDimitry Andric if (!SE.isKnownNonNegative( 18490fca6ea1SDimitry Andric SE.applyLoopGuards(Recur, HighAR->getLoop()))) { 18505f757f3fSDimitry Andric Stride = Recur; 18515f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "LAA: ... but need to check stride is " 18525f757f3fSDimitry Andric "positive: " 18535f757f3fSDimitry Andric << *Stride << '\n'); 18545f757f3fSDimitry Andric } 18555f757f3fSDimitry Andric } 18565f757f3fSDimitry Andric } 18575f757f3fSDimitry Andric } 18585f757f3fSDimitry Andric } 18595f757f3fSDimitry Andric 18605f757f3fSDimitry Andric Start = Exp.expandCodeFor(Low, PtrArithTy, Loc); 18615f757f3fSDimitry Andric End = Exp.expandCodeFor(High, PtrArithTy, Loc); 186281ad6265SDimitry Andric if (CG->NeedsFreeze) { 186381ad6265SDimitry Andric IRBuilder<> Builder(Loc); 186481ad6265SDimitry Andric Start = Builder.CreateFreeze(Start, Start->getName() + ".fr"); 186581ad6265SDimitry Andric End = Builder.CreateFreeze(End, End->getName() + ".fr"); 186681ad6265SDimitry Andric } 18675f757f3fSDimitry Andric Value *StrideVal = 18685f757f3fSDimitry Andric Stride ? Exp.expandCodeFor(Stride, Stride->getType(), Loc) : nullptr; 18695f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Start: " << *Low << " End: " << *High << "\n"); 18705f757f3fSDimitry Andric return {Start, End, StrideVal}; 18715ffd83dbSDimitry Andric } 18725ffd83dbSDimitry Andric 18735ffd83dbSDimitry Andric /// Turns a collection of checks into a collection of expanded upper and 18745ffd83dbSDimitry Andric /// lower bounds for both pointers in the check. 18755ffd83dbSDimitry Andric static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> 18765ffd83dbSDimitry Andric expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L, 18775f757f3fSDimitry Andric Instruction *Loc, SCEVExpander &Exp, bool HoistRuntimeChecks) { 18785ffd83dbSDimitry Andric SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds; 18795ffd83dbSDimitry Andric 18805ffd83dbSDimitry Andric // Here we're relying on the SCEV Expander's cache to only emit code for the 18815ffd83dbSDimitry Andric // same bounds once. 18825ffd83dbSDimitry Andric transform(PointerChecks, std::back_inserter(ChecksWithBounds), 18835ffd83dbSDimitry Andric [&](const RuntimePointerCheck &Check) { 18845f757f3fSDimitry Andric PointerBounds First = expandBounds(Check.first, L, Loc, Exp, 18855f757f3fSDimitry Andric HoistRuntimeChecks), 18865f757f3fSDimitry Andric Second = expandBounds(Check.second, L, Loc, Exp, 18875f757f3fSDimitry Andric HoistRuntimeChecks); 18885ffd83dbSDimitry Andric return std::make_pair(First, Second); 18895ffd83dbSDimitry Andric }); 18905ffd83dbSDimitry Andric 18915ffd83dbSDimitry Andric return ChecksWithBounds; 18925ffd83dbSDimitry Andric } 18935ffd83dbSDimitry Andric 1894349cc55cSDimitry Andric Value *llvm::addRuntimeChecks( 18955ffd83dbSDimitry Andric Instruction *Loc, Loop *TheLoop, 18965ffd83dbSDimitry Andric const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, 18975f757f3fSDimitry Andric SCEVExpander &Exp, bool HoistRuntimeChecks) { 18985ffd83dbSDimitry Andric // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible. 18995ffd83dbSDimitry Andric // TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible 19005f757f3fSDimitry Andric auto ExpandedChecks = 19015f757f3fSDimitry Andric expandBounds(PointerChecks, TheLoop, Loc, Exp, HoistRuntimeChecks); 19025ffd83dbSDimitry Andric 19035ffd83dbSDimitry Andric LLVMContext &Ctx = Loc->getContext(); 190404eeddc0SDimitry Andric IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx, 19050fca6ea1SDimitry Andric Loc->getDataLayout()); 190604eeddc0SDimitry Andric ChkBuilder.SetInsertPoint(Loc); 19075ffd83dbSDimitry Andric // Our instructions might fold to a constant. 19085ffd83dbSDimitry Andric Value *MemoryRuntimeCheck = nullptr; 19095ffd83dbSDimitry Andric 19100fca6ea1SDimitry Andric for (const auto &[A, B] : ExpandedChecks) { 19115ffd83dbSDimitry Andric // Check if two pointers (A and B) conflict where conflict is computed as: 19125ffd83dbSDimitry Andric // start(A) <= end(B) && start(B) <= end(A) 19135ffd83dbSDimitry Andric 19145f757f3fSDimitry Andric assert((A.Start->getType()->getPointerAddressSpace() == 19155f757f3fSDimitry Andric B.End->getType()->getPointerAddressSpace()) && 19165f757f3fSDimitry Andric (B.Start->getType()->getPointerAddressSpace() == 19175f757f3fSDimitry Andric A.End->getType()->getPointerAddressSpace()) && 19185ffd83dbSDimitry Andric "Trying to bounds check pointers with different address spaces"); 19195ffd83dbSDimitry Andric 19205ffd83dbSDimitry Andric // [A|B].Start points to the first accessed byte under base [A|B]. 19215ffd83dbSDimitry Andric // [A|B].End points to the last accessed byte, plus one. 19225ffd83dbSDimitry Andric // There is no conflict when the intervals are disjoint: 19235ffd83dbSDimitry Andric // NoConflict = (B.Start >= A.End) || (A.Start >= B.End) 19245ffd83dbSDimitry Andric // 19255ffd83dbSDimitry Andric // bound0 = (B.Start < A.End) 19265ffd83dbSDimitry Andric // bound1 = (A.Start < B.End) 19275ffd83dbSDimitry Andric // IsConflict = bound0 & bound1 19285f757f3fSDimitry Andric Value *Cmp0 = ChkBuilder.CreateICmpULT(A.Start, B.End, "bound0"); 19295f757f3fSDimitry Andric Value *Cmp1 = ChkBuilder.CreateICmpULT(B.Start, A.End, "bound1"); 19305ffd83dbSDimitry Andric Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); 19315f757f3fSDimitry Andric if (A.StrideToCheck) { 19325f757f3fSDimitry Andric Value *IsNegativeStride = ChkBuilder.CreateICmpSLT( 19335f757f3fSDimitry Andric A.StrideToCheck, ConstantInt::get(A.StrideToCheck->getType(), 0), 19345f757f3fSDimitry Andric "stride.check"); 19355f757f3fSDimitry Andric IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride); 19365f757f3fSDimitry Andric } 19375f757f3fSDimitry Andric if (B.StrideToCheck) { 19385f757f3fSDimitry Andric Value *IsNegativeStride = ChkBuilder.CreateICmpSLT( 19395f757f3fSDimitry Andric B.StrideToCheck, ConstantInt::get(B.StrideToCheck->getType(), 0), 19405f757f3fSDimitry Andric "stride.check"); 19415f757f3fSDimitry Andric IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride); 19425f757f3fSDimitry Andric } 19435ffd83dbSDimitry Andric if (MemoryRuntimeCheck) { 19445ffd83dbSDimitry Andric IsConflict = 19455ffd83dbSDimitry Andric ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx"); 19465ffd83dbSDimitry Andric } 19475ffd83dbSDimitry Andric MemoryRuntimeCheck = IsConflict; 19485ffd83dbSDimitry Andric } 19495ffd83dbSDimitry Andric 1950349cc55cSDimitry Andric return MemoryRuntimeCheck; 19515ffd83dbSDimitry Andric } 1952fe6060f1SDimitry Andric 195381ad6265SDimitry Andric Value *llvm::addDiffRuntimeChecks( 1954bdd1243dSDimitry Andric Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander, 195581ad6265SDimitry Andric function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC) { 195681ad6265SDimitry Andric 195781ad6265SDimitry Andric LLVMContext &Ctx = Loc->getContext(); 195881ad6265SDimitry Andric IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx, 19590fca6ea1SDimitry Andric Loc->getDataLayout()); 196081ad6265SDimitry Andric ChkBuilder.SetInsertPoint(Loc); 196181ad6265SDimitry Andric // Our instructions might fold to a constant. 196281ad6265SDimitry Andric Value *MemoryRuntimeCheck = nullptr; 196381ad6265SDimitry Andric 19645f757f3fSDimitry Andric auto &SE = *Expander.getSE(); 19655f757f3fSDimitry Andric // Map to keep track of created compares, The key is the pair of operands for 19665f757f3fSDimitry Andric // the compare, to allow detecting and re-using redundant compares. 19675f757f3fSDimitry Andric DenseMap<std::pair<Value *, Value *>, Value *> SeenCompares; 19680fca6ea1SDimitry Andric for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) { 19690fca6ea1SDimitry Andric Type *Ty = SinkStart->getType(); 197081ad6265SDimitry Andric // Compute VF * IC * AccessSize. 197181ad6265SDimitry Andric auto *VFTimesUFTimesSize = 197281ad6265SDimitry Andric ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()), 19730fca6ea1SDimitry Andric ConstantInt::get(Ty, IC * AccessSize)); 19740fca6ea1SDimitry Andric Value *Diff = 19750fca6ea1SDimitry Andric Expander.expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty, Loc); 197681ad6265SDimitry Andric 19775f757f3fSDimitry Andric // Check if the same compare has already been created earlier. In that case, 19785f757f3fSDimitry Andric // there is no need to check it again. 19795f757f3fSDimitry Andric Value *IsConflict = SeenCompares.lookup({Diff, VFTimesUFTimesSize}); 19805f757f3fSDimitry Andric if (IsConflict) 19815f757f3fSDimitry Andric continue; 19825f757f3fSDimitry Andric 19835f757f3fSDimitry Andric IsConflict = 19845f757f3fSDimitry Andric ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check"); 19855f757f3fSDimitry Andric SeenCompares.insert({{Diff, VFTimesUFTimesSize}, IsConflict}); 19860fca6ea1SDimitry Andric if (NeedsFreeze) 19875f757f3fSDimitry Andric IsConflict = 19885f757f3fSDimitry Andric ChkBuilder.CreateFreeze(IsConflict, IsConflict->getName() + ".fr"); 198981ad6265SDimitry Andric if (MemoryRuntimeCheck) { 199081ad6265SDimitry Andric IsConflict = 199181ad6265SDimitry Andric ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx"); 199281ad6265SDimitry Andric } 199381ad6265SDimitry Andric MemoryRuntimeCheck = IsConflict; 199481ad6265SDimitry Andric } 199581ad6265SDimitry Andric 199681ad6265SDimitry Andric return MemoryRuntimeCheck; 199781ad6265SDimitry Andric } 199881ad6265SDimitry Andric 1999bdd1243dSDimitry Andric std::optional<IVConditionInfo> 2000bdd1243dSDimitry Andric llvm::hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, 2001bdd1243dSDimitry Andric const MemorySSA &MSSA, AAResults &AA) { 2002fe6060f1SDimitry Andric auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator()); 2003fe6060f1SDimitry Andric if (!TI || !TI->isConditional()) 2004fe6060f1SDimitry Andric return {}; 2005fe6060f1SDimitry Andric 20060fca6ea1SDimitry Andric auto *CondI = dyn_cast<Instruction>(TI->getCondition()); 2007fe6060f1SDimitry Andric // The case with the condition outside the loop should already be handled 2008fe6060f1SDimitry Andric // earlier. 20090fca6ea1SDimitry Andric // Allow CmpInst and TruncInsts as they may be users of load instructions 20100fca6ea1SDimitry Andric // and have potential for partial unswitching 20110fca6ea1SDimitry Andric if (!CondI || !isa<CmpInst, TruncInst>(CondI) || !L.contains(CondI)) 2012fe6060f1SDimitry Andric return {}; 2013fe6060f1SDimitry Andric 2014fe6060f1SDimitry Andric SmallVector<Instruction *> InstToDuplicate; 2015fe6060f1SDimitry Andric InstToDuplicate.push_back(CondI); 2016fe6060f1SDimitry Andric 2017fe6060f1SDimitry Andric SmallVector<Value *, 4> WorkList; 2018fe6060f1SDimitry Andric WorkList.append(CondI->op_begin(), CondI->op_end()); 2019fe6060f1SDimitry Andric 2020fe6060f1SDimitry Andric SmallVector<MemoryAccess *, 4> AccessesToCheck; 2021fe6060f1SDimitry Andric SmallVector<MemoryLocation, 4> AccessedLocs; 2022fe6060f1SDimitry Andric while (!WorkList.empty()) { 2023fe6060f1SDimitry Andric Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val()); 2024fe6060f1SDimitry Andric if (!I || !L.contains(I)) 2025fe6060f1SDimitry Andric continue; 2026fe6060f1SDimitry Andric 2027fe6060f1SDimitry Andric // TODO: support additional instructions. 2028fe6060f1SDimitry Andric if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I)) 2029fe6060f1SDimitry Andric return {}; 2030fe6060f1SDimitry Andric 2031fe6060f1SDimitry Andric // Do not duplicate volatile and atomic loads. 2032fe6060f1SDimitry Andric if (auto *LI = dyn_cast<LoadInst>(I)) 2033fe6060f1SDimitry Andric if (LI->isVolatile() || LI->isAtomic()) 2034fe6060f1SDimitry Andric return {}; 2035fe6060f1SDimitry Andric 2036fe6060f1SDimitry Andric InstToDuplicate.push_back(I); 2037fe6060f1SDimitry Andric if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) { 2038fe6060f1SDimitry Andric if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) { 2039fe6060f1SDimitry Andric // Queue the defining access to check for alias checks. 2040fe6060f1SDimitry Andric AccessesToCheck.push_back(MemUse->getDefiningAccess()); 2041fe6060f1SDimitry Andric AccessedLocs.push_back(MemoryLocation::get(I)); 2042fe6060f1SDimitry Andric } else { 2043fe6060f1SDimitry Andric // MemoryDefs may clobber the location or may be atomic memory 2044fe6060f1SDimitry Andric // operations. Bail out. 2045fe6060f1SDimitry Andric return {}; 2046fe6060f1SDimitry Andric } 2047fe6060f1SDimitry Andric } 2048fe6060f1SDimitry Andric WorkList.append(I->op_begin(), I->op_end()); 2049fe6060f1SDimitry Andric } 2050fe6060f1SDimitry Andric 2051fe6060f1SDimitry Andric if (InstToDuplicate.empty()) 2052fe6060f1SDimitry Andric return {}; 2053fe6060f1SDimitry Andric 2054fe6060f1SDimitry Andric SmallVector<BasicBlock *, 4> ExitingBlocks; 2055fe6060f1SDimitry Andric L.getExitingBlocks(ExitingBlocks); 2056fe6060f1SDimitry Andric auto HasNoClobbersOnPath = 2057fe6060f1SDimitry Andric [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate, 2058fe6060f1SDimitry Andric MSSAThreshold](BasicBlock *Succ, BasicBlock *Header, 2059fe6060f1SDimitry Andric SmallVector<MemoryAccess *, 4> AccessesToCheck) 2060bdd1243dSDimitry Andric -> std::optional<IVConditionInfo> { 2061fe6060f1SDimitry Andric IVConditionInfo Info; 2062fe6060f1SDimitry Andric // First, collect all blocks in the loop that are on a patch from Succ 2063fe6060f1SDimitry Andric // to the header. 2064fe6060f1SDimitry Andric SmallVector<BasicBlock *, 4> WorkList; 2065fe6060f1SDimitry Andric WorkList.push_back(Succ); 2066fe6060f1SDimitry Andric WorkList.push_back(Header); 2067fe6060f1SDimitry Andric SmallPtrSet<BasicBlock *, 4> Seen; 2068fe6060f1SDimitry Andric Seen.insert(Header); 2069fe6060f1SDimitry Andric Info.PathIsNoop &= 2070fe6060f1SDimitry Andric all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); }); 2071fe6060f1SDimitry Andric 2072fe6060f1SDimitry Andric while (!WorkList.empty()) { 2073fe6060f1SDimitry Andric BasicBlock *Current = WorkList.pop_back_val(); 2074fe6060f1SDimitry Andric if (!L.contains(Current)) 2075fe6060f1SDimitry Andric continue; 2076fe6060f1SDimitry Andric const auto &SeenIns = Seen.insert(Current); 2077fe6060f1SDimitry Andric if (!SeenIns.second) 2078fe6060f1SDimitry Andric continue; 2079fe6060f1SDimitry Andric 2080fe6060f1SDimitry Andric Info.PathIsNoop &= all_of( 2081fe6060f1SDimitry Andric *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); }); 2082fe6060f1SDimitry Andric WorkList.append(succ_begin(Current), succ_end(Current)); 2083fe6060f1SDimitry Andric } 2084fe6060f1SDimitry Andric 2085fe6060f1SDimitry Andric // Require at least 2 blocks on a path through the loop. This skips 2086fe6060f1SDimitry Andric // paths that directly exit the loop. 2087fe6060f1SDimitry Andric if (Seen.size() < 2) 2088fe6060f1SDimitry Andric return {}; 2089fe6060f1SDimitry Andric 2090fe6060f1SDimitry Andric // Next, check if there are any MemoryDefs that are on the path through 2091fe6060f1SDimitry Andric // the loop (in the Seen set) and they may-alias any of the locations in 2092fe6060f1SDimitry Andric // AccessedLocs. If that is the case, they may modify the condition and 2093fe6060f1SDimitry Andric // partial unswitching is not possible. 2094fe6060f1SDimitry Andric SmallPtrSet<MemoryAccess *, 4> SeenAccesses; 2095fe6060f1SDimitry Andric while (!AccessesToCheck.empty()) { 2096fe6060f1SDimitry Andric MemoryAccess *Current = AccessesToCheck.pop_back_val(); 2097fe6060f1SDimitry Andric auto SeenI = SeenAccesses.insert(Current); 2098fe6060f1SDimitry Andric if (!SeenI.second || !Seen.contains(Current->getBlock())) 2099fe6060f1SDimitry Andric continue; 2100fe6060f1SDimitry Andric 2101fe6060f1SDimitry Andric // Bail out if exceeded the threshold. 2102fe6060f1SDimitry Andric if (SeenAccesses.size() >= MSSAThreshold) 2103fe6060f1SDimitry Andric return {}; 2104fe6060f1SDimitry Andric 2105fe6060f1SDimitry Andric // MemoryUse are read-only accesses. 2106fe6060f1SDimitry Andric if (isa<MemoryUse>(Current)) 2107fe6060f1SDimitry Andric continue; 2108fe6060f1SDimitry Andric 2109fe6060f1SDimitry Andric // For a MemoryDef, check if is aliases any of the location feeding 2110fe6060f1SDimitry Andric // the original condition. 2111fe6060f1SDimitry Andric if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) { 2112fe6060f1SDimitry Andric if (any_of(AccessedLocs, [&AA, CurrentDef](MemoryLocation &Loc) { 2113fe6060f1SDimitry Andric return isModSet( 2114fe6060f1SDimitry Andric AA.getModRefInfo(CurrentDef->getMemoryInst(), Loc)); 2115fe6060f1SDimitry Andric })) 2116fe6060f1SDimitry Andric return {}; 2117fe6060f1SDimitry Andric } 2118fe6060f1SDimitry Andric 2119fe6060f1SDimitry Andric for (Use &U : Current->uses()) 2120fe6060f1SDimitry Andric AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser())); 2121fe6060f1SDimitry Andric } 2122fe6060f1SDimitry Andric 2123fe6060f1SDimitry Andric // We could also allow loops with known trip counts without mustprogress, 2124fe6060f1SDimitry Andric // but ScalarEvolution may not be available. 2125fe6060f1SDimitry Andric Info.PathIsNoop &= isMustProgress(&L); 2126fe6060f1SDimitry Andric 2127fe6060f1SDimitry Andric // If the path is considered a no-op so far, check if it reaches a 2128fe6060f1SDimitry Andric // single exit block without any phis. This ensures no values from the 2129fe6060f1SDimitry Andric // loop are used outside of the loop. 2130fe6060f1SDimitry Andric if (Info.PathIsNoop) { 2131fe6060f1SDimitry Andric for (auto *Exiting : ExitingBlocks) { 2132fe6060f1SDimitry Andric if (!Seen.contains(Exiting)) 2133fe6060f1SDimitry Andric continue; 2134fe6060f1SDimitry Andric for (auto *Succ : successors(Exiting)) { 2135fe6060f1SDimitry Andric if (L.contains(Succ)) 2136fe6060f1SDimitry Andric continue; 2137fe6060f1SDimitry Andric 2138bdd1243dSDimitry Andric Info.PathIsNoop &= Succ->phis().empty() && 2139fe6060f1SDimitry Andric (!Info.ExitForPath || Info.ExitForPath == Succ); 2140fe6060f1SDimitry Andric if (!Info.PathIsNoop) 2141fe6060f1SDimitry Andric break; 2142fe6060f1SDimitry Andric assert((!Info.ExitForPath || Info.ExitForPath == Succ) && 2143fe6060f1SDimitry Andric "cannot have multiple exit blocks"); 2144fe6060f1SDimitry Andric Info.ExitForPath = Succ; 2145fe6060f1SDimitry Andric } 2146fe6060f1SDimitry Andric } 2147fe6060f1SDimitry Andric } 2148fe6060f1SDimitry Andric if (!Info.ExitForPath) 2149fe6060f1SDimitry Andric Info.PathIsNoop = false; 2150fe6060f1SDimitry Andric 2151fe6060f1SDimitry Andric Info.InstToDuplicate = InstToDuplicate; 2152fe6060f1SDimitry Andric return Info; 2153fe6060f1SDimitry Andric }; 2154fe6060f1SDimitry Andric 2155fe6060f1SDimitry Andric // If we branch to the same successor, partial unswitching will not be 2156fe6060f1SDimitry Andric // beneficial. 2157fe6060f1SDimitry Andric if (TI->getSuccessor(0) == TI->getSuccessor(1)) 2158fe6060f1SDimitry Andric return {}; 2159fe6060f1SDimitry Andric 2160fe6060f1SDimitry Andric if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(), 2161fe6060f1SDimitry Andric AccessesToCheck)) { 2162fe6060f1SDimitry Andric Info->KnownValue = ConstantInt::getTrue(TI->getContext()); 2163fe6060f1SDimitry Andric return Info; 2164fe6060f1SDimitry Andric } 2165fe6060f1SDimitry Andric if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(), 2166fe6060f1SDimitry Andric AccessesToCheck)) { 2167fe6060f1SDimitry Andric Info->KnownValue = ConstantInt::getFalse(TI->getContext()); 2168fe6060f1SDimitry Andric return Info; 2169fe6060f1SDimitry Andric } 2170fe6060f1SDimitry Andric 2171fe6060f1SDimitry Andric return {}; 2172fe6060f1SDimitry Andric } 2173