1 //===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements some loop unrolling utilities. It does not define any 10 // actual pass or policy, but provides a single function to perform loop 11 // unrolling. 12 // 13 // The process of unrolling can produce extraneous basic blocks linked with 14 // unconditional branches. This will be corrected in the future. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/ScopedHashTable.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/Statistic.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/Twine.h" 27 #include "llvm/Analysis/AliasAnalysis.h" 28 #include "llvm/Analysis/AssumptionCache.h" 29 #include "llvm/Analysis/DomTreeUpdater.h" 30 #include "llvm/Analysis/InstructionSimplify.h" 31 #include "llvm/Analysis/LoopInfo.h" 32 #include "llvm/Analysis/LoopIterator.h" 33 #include "llvm/Analysis/MemorySSA.h" 34 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 35 #include "llvm/Analysis/ScalarEvolution.h" 36 #include "llvm/IR/BasicBlock.h" 37 #include "llvm/IR/CFG.h" 38 #include "llvm/IR/Constants.h" 39 #include "llvm/IR/DebugInfoMetadata.h" 40 #include "llvm/IR/DebugLoc.h" 41 #include "llvm/IR/DiagnosticInfo.h" 42 #include "llvm/IR/Dominators.h" 43 #include "llvm/IR/Function.h" 44 #include "llvm/IR/Instruction.h" 45 #include "llvm/IR/Instructions.h" 46 #include "llvm/IR/IntrinsicInst.h" 47 #include "llvm/IR/Metadata.h" 48 #include "llvm/IR/PatternMatch.h" 49 #include "llvm/IR/Use.h" 50 #include "llvm/IR/User.h" 51 #include "llvm/IR/ValueHandle.h" 52 #include "llvm/IR/ValueMap.h" 53 #include "llvm/Support/Casting.h" 54 #include "llvm/Support/CommandLine.h" 55 #include "llvm/Support/Debug.h" 56 #include "llvm/Support/GenericDomTree.h" 57 #include "llvm/Support/raw_ostream.h" 58 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 59 #include "llvm/Transforms/Utils/Cloning.h" 60 #include "llvm/Transforms/Utils/Local.h" 61 #include "llvm/Transforms/Utils/LoopSimplify.h" 62 #include "llvm/Transforms/Utils/LoopUtils.h" 63 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 64 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 65 #include "llvm/Transforms/Utils/UnrollLoop.h" 66 #include "llvm/Transforms/Utils/ValueMapper.h" 67 #include <algorithm> 68 #include <assert.h> 69 #include <numeric> 70 #include <type_traits> 71 #include <vector> 72 73 namespace llvm { 74 class DataLayout; 75 class Value; 76 } // namespace llvm 77 78 using namespace llvm; 79 80 #define DEBUG_TYPE "loop-unroll" 81 82 // TODO: Should these be here or in LoopUnroll? 83 STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); 84 STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); 85 STATISTIC(NumUnrolledNotLatch, "Number of loops unrolled without a conditional " 86 "latch (completely or otherwise)"); 87 88 static cl::opt<bool> 89 UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden, 90 cl::desc("Allow runtime unrolled loops to be unrolled " 91 "with epilog instead of prolog.")); 92 93 static cl::opt<bool> 94 UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden, 95 cl::desc("Verify domtree after unrolling"), 96 #ifdef EXPENSIVE_CHECKS 97 cl::init(true) 98 #else 99 cl::init(false) 100 #endif 101 ); 102 103 static cl::opt<bool> 104 UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden, 105 cl::desc("Verify loopinfo after unrolling"), 106 #ifdef EXPENSIVE_CHECKS 107 cl::init(true) 108 #else 109 cl::init(false) 110 #endif 111 ); 112 113 114 /// Check if unrolling created a situation where we need to insert phi nodes to 115 /// preserve LCSSA form. 116 /// \param Blocks is a vector of basic blocks representing unrolled loop. 117 /// \param L is the outer loop. 118 /// It's possible that some of the blocks are in L, and some are not. In this 119 /// case, if there is a use is outside L, and definition is inside L, we need to 120 /// insert a phi-node, otherwise LCSSA will be broken. 121 /// The function is just a helper function for llvm::UnrollLoop that returns 122 /// true if this situation occurs, indicating that LCSSA needs to be fixed. 123 static bool needToInsertPhisForLCSSA(Loop *L, 124 const std::vector<BasicBlock *> &Blocks, 125 LoopInfo *LI) { 126 for (BasicBlock *BB : Blocks) { 127 if (LI->getLoopFor(BB) == L) 128 continue; 129 for (Instruction &I : *BB) { 130 for (Use &U : I.operands()) { 131 if (const auto *Def = dyn_cast<Instruction>(U)) { 132 Loop *DefLoop = LI->getLoopFor(Def->getParent()); 133 if (!DefLoop) 134 continue; 135 if (DefLoop->contains(L)) 136 return true; 137 } 138 } 139 } 140 } 141 return false; 142 } 143 144 /// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary 145 /// and adds a mapping from the original loop to the new loop to NewLoops. 146 /// Returns nullptr if no new loop was created and a pointer to the 147 /// original loop OriginalBB was part of otherwise. 148 const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB, 149 BasicBlock *ClonedBB, LoopInfo *LI, 150 NewLoopsMap &NewLoops) { 151 // Figure out which loop New is in. 152 const Loop *OldLoop = LI->getLoopFor(OriginalBB); 153 assert(OldLoop && "Should (at least) be in the loop being unrolled!"); 154 155 Loop *&NewLoop = NewLoops[OldLoop]; 156 if (!NewLoop) { 157 // Found a new sub-loop. 158 assert(OriginalBB == OldLoop->getHeader() && 159 "Header should be first in RPO"); 160 161 NewLoop = LI->AllocateLoop(); 162 Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); 163 164 if (NewLoopParent) 165 NewLoopParent->addChildLoop(NewLoop); 166 else 167 LI->addTopLevelLoop(NewLoop); 168 169 NewLoop->addBasicBlockToLoop(ClonedBB, *LI); 170 return OldLoop; 171 } else { 172 NewLoop->addBasicBlockToLoop(ClonedBB, *LI); 173 return nullptr; 174 } 175 } 176 177 /// The function chooses which type of unroll (epilog or prolog) is more 178 /// profitabale. 179 /// Epilog unroll is more profitable when there is PHI that starts from 180 /// constant. In this case epilog will leave PHI start from constant, 181 /// but prolog will convert it to non-constant. 182 /// 183 /// loop: 184 /// PN = PHI [I, Latch], [CI, PreHeader] 185 /// I = foo(PN) 186 /// ... 187 /// 188 /// Epilog unroll case. 189 /// loop: 190 /// PN = PHI [I2, Latch], [CI, PreHeader] 191 /// I1 = foo(PN) 192 /// I2 = foo(I1) 193 /// ... 194 /// Prolog unroll case. 195 /// NewPN = PHI [PrologI, Prolog], [CI, PreHeader] 196 /// loop: 197 /// PN = PHI [I2, Latch], [NewPN, PreHeader] 198 /// I1 = foo(PN) 199 /// I2 = foo(I1) 200 /// ... 201 /// 202 static bool isEpilogProfitable(Loop *L) { 203 BasicBlock *PreHeader = L->getLoopPreheader(); 204 BasicBlock *Header = L->getHeader(); 205 assert(PreHeader && Header); 206 for (const PHINode &PN : Header->phis()) { 207 if (isa<ConstantInt>(PN.getIncomingValueForBlock(PreHeader))) 208 return true; 209 } 210 return false; 211 } 212 213 struct LoadValue { 214 Instruction *DefI = nullptr; 215 unsigned Generation = 0; 216 LoadValue() = default; 217 LoadValue(Instruction *Inst, unsigned Generation) 218 : DefI(Inst), Generation(Generation) {} 219 }; 220 221 class StackNode { 222 ScopedHashTable<const SCEV *, LoadValue>::ScopeTy LoadScope; 223 unsigned CurrentGeneration; 224 unsigned ChildGeneration; 225 DomTreeNode *Node; 226 DomTreeNode::const_iterator ChildIter; 227 DomTreeNode::const_iterator EndIter; 228 bool Processed = false; 229 230 public: 231 StackNode(ScopedHashTable<const SCEV *, LoadValue> &AvailableLoads, 232 unsigned cg, DomTreeNode *N, DomTreeNode::const_iterator Child, 233 DomTreeNode::const_iterator End) 234 : LoadScope(AvailableLoads), CurrentGeneration(cg), ChildGeneration(cg), 235 Node(N), ChildIter(Child), EndIter(End) {} 236 // Accessors. 237 unsigned currentGeneration() const { return CurrentGeneration; } 238 unsigned childGeneration() const { return ChildGeneration; } 239 void childGeneration(unsigned generation) { ChildGeneration = generation; } 240 DomTreeNode *node() { return Node; } 241 DomTreeNode::const_iterator childIter() const { return ChildIter; } 242 243 DomTreeNode *nextChild() { 244 DomTreeNode *Child = *ChildIter; 245 ++ChildIter; 246 return Child; 247 } 248 249 DomTreeNode::const_iterator end() const { return EndIter; } 250 bool isProcessed() const { return Processed; } 251 void process() { Processed = true; } 252 }; 253 254 Value *getMatchingValue(LoadValue LV, LoadInst *LI, unsigned CurrentGeneration, 255 BatchAAResults &BAA, 256 function_ref<MemorySSA *()> GetMSSA) { 257 if (!LV.DefI) 258 return nullptr; 259 if (LV.DefI->getType() != LI->getType()) 260 return nullptr; 261 if (LV.Generation != CurrentGeneration) { 262 MemorySSA *MSSA = GetMSSA(); 263 if (!MSSA) 264 return nullptr; 265 auto *EarlierMA = MSSA->getMemoryAccess(LV.DefI); 266 MemoryAccess *LaterDef = 267 MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA); 268 if (!MSSA->dominates(LaterDef, EarlierMA)) 269 return nullptr; 270 } 271 return LV.DefI; 272 } 273 274 void loadCSE(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, 275 BatchAAResults &BAA, function_ref<MemorySSA *()> GetMSSA) { 276 ScopedHashTable<const SCEV *, LoadValue> AvailableLoads; 277 SmallVector<std::unique_ptr<StackNode>> NodesToProcess; 278 DomTreeNode *HeaderD = DT.getNode(L->getHeader()); 279 NodesToProcess.emplace_back(new StackNode(AvailableLoads, 0, HeaderD, 280 HeaderD->begin(), HeaderD->end())); 281 282 unsigned CurrentGeneration = 0; 283 while (!NodesToProcess.empty()) { 284 StackNode *NodeToProcess = &*NodesToProcess.back(); 285 286 CurrentGeneration = NodeToProcess->currentGeneration(); 287 288 if (!NodeToProcess->isProcessed()) { 289 // Process the node. 290 291 // If this block has a single predecessor, then the predecessor is the 292 // parent 293 // of the domtree node and all of the live out memory values are still 294 // current in this block. If this block has multiple predecessors, then 295 // they could have invalidated the live-out memory values of our parent 296 // value. For now, just be conservative and invalidate memory if this 297 // block has multiple predecessors. 298 if (!NodeToProcess->node()->getBlock()->getSinglePredecessor()) 299 ++CurrentGeneration; 300 for (auto &I : make_early_inc_range(*NodeToProcess->node()->getBlock())) { 301 302 auto *Load = dyn_cast<LoadInst>(&I); 303 if (!Load || !Load->isSimple()) { 304 if (I.mayWriteToMemory()) 305 CurrentGeneration++; 306 continue; 307 } 308 309 const SCEV *PtrSCEV = SE.getSCEV(Load->getPointerOperand()); 310 LoadValue LV = AvailableLoads.lookup(PtrSCEV); 311 if (Value *M = 312 getMatchingValue(LV, Load, CurrentGeneration, BAA, GetMSSA)) { 313 if (LI.replacementPreservesLCSSAForm(Load, M)) { 314 Load->replaceAllUsesWith(M); 315 Load->eraseFromParent(); 316 } 317 } else { 318 AvailableLoads.insert(PtrSCEV, LoadValue(Load, CurrentGeneration)); 319 } 320 } 321 NodeToProcess->childGeneration(CurrentGeneration); 322 NodeToProcess->process(); 323 } else if (NodeToProcess->childIter() != NodeToProcess->end()) { 324 // Push the next child onto the stack. 325 DomTreeNode *Child = NodeToProcess->nextChild(); 326 if (!L->contains(Child->getBlock())) 327 continue; 328 NodesToProcess.emplace_back( 329 new StackNode(AvailableLoads, NodeToProcess->childGeneration(), Child, 330 Child->begin(), Child->end())); 331 } else { 332 // It has been processed, and there are no more children to process, 333 // so delete it and pop it off the stack. 334 NodesToProcess.pop_back(); 335 } 336 } 337 } 338 339 /// Perform some cleanup and simplifications on loops after unrolling. It is 340 /// useful to simplify the IV's in the new loop, as well as do a quick 341 /// simplify/dce pass of the instructions. 342 void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, 343 ScalarEvolution *SE, DominatorTree *DT, 344 AssumptionCache *AC, 345 const TargetTransformInfo *TTI, 346 AAResults *AA) { 347 using namespace llvm::PatternMatch; 348 349 // Simplify any new induction variables in the partially unrolled loop. 350 if (SE && SimplifyIVs) { 351 SmallVector<WeakTrackingVH, 16> DeadInsts; 352 simplifyLoopIVs(L, SE, DT, LI, TTI, DeadInsts); 353 354 // Aggressively clean up dead instructions that simplifyLoopIVs already 355 // identified. Any remaining should be cleaned up below. 356 while (!DeadInsts.empty()) { 357 Value *V = DeadInsts.pop_back_val(); 358 if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) 359 RecursivelyDeleteTriviallyDeadInstructions(Inst); 360 } 361 362 if (AA) { 363 std::unique_ptr<MemorySSA> MSSA = nullptr; 364 BatchAAResults BAA(*AA); 365 loadCSE(L, *DT, *SE, *LI, BAA, [L, AA, DT, &MSSA]() -> MemorySSA * { 366 if (!MSSA) 367 MSSA.reset(new MemorySSA(*L, AA, DT)); 368 return &*MSSA; 369 }); 370 } 371 } 372 373 // At this point, the code is well formed. Perform constprop, instsimplify, 374 // and dce. 375 const DataLayout &DL = L->getHeader()->getDataLayout(); 376 SmallVector<WeakTrackingVH, 16> DeadInsts; 377 for (BasicBlock *BB : L->getBlocks()) { 378 // Remove repeated debug instructions after loop unrolling. 379 if (BB->getParent()->getSubprogram()) 380 RemoveRedundantDbgInstrs(BB); 381 382 for (Instruction &Inst : llvm::make_early_inc_range(*BB)) { 383 if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC})) 384 if (LI->replacementPreservesLCSSAForm(&Inst, V)) 385 Inst.replaceAllUsesWith(V); 386 if (isInstructionTriviallyDead(&Inst)) 387 DeadInsts.emplace_back(&Inst); 388 389 // Fold ((add X, C1), C2) to (add X, C1+C2). This is very common in 390 // unrolled loops, and handling this early allows following code to 391 // identify the IV as a "simple recurrence" without first folding away 392 // a long chain of adds. 393 { 394 Value *X; 395 const APInt *C1, *C2; 396 if (match(&Inst, m_Add(m_Add(m_Value(X), m_APInt(C1)), m_APInt(C2)))) { 397 auto *InnerI = dyn_cast<Instruction>(Inst.getOperand(0)); 398 auto *InnerOBO = cast<OverflowingBinaryOperator>(Inst.getOperand(0)); 399 bool SignedOverflow; 400 APInt NewC = C1->sadd_ov(*C2, SignedOverflow); 401 Inst.setOperand(0, X); 402 Inst.setOperand(1, ConstantInt::get(Inst.getType(), NewC)); 403 Inst.setHasNoUnsignedWrap(Inst.hasNoUnsignedWrap() && 404 InnerOBO->hasNoUnsignedWrap()); 405 Inst.setHasNoSignedWrap(Inst.hasNoSignedWrap() && 406 InnerOBO->hasNoSignedWrap() && 407 !SignedOverflow); 408 if (InnerI && isInstructionTriviallyDead(InnerI)) 409 DeadInsts.emplace_back(InnerI); 410 } 411 } 412 } 413 // We can't do recursive deletion until we're done iterating, as we might 414 // have a phi which (potentially indirectly) uses instructions later in 415 // the block we're iterating through. 416 RecursivelyDeleteTriviallyDeadInstructions(DeadInsts); 417 } 418 } 419 420 // Loops containing convergent instructions that are uncontrolled or controlled 421 // from outside the loop must have a count that divides their TripMultiple. 422 LLVM_ATTRIBUTE_USED 423 static bool canHaveUnrollRemainder(const Loop *L) { 424 if (getLoopConvergenceHeart(L)) 425 return false; 426 427 // Check for uncontrolled convergent operations. 428 for (auto &BB : L->blocks()) { 429 for (auto &I : *BB) { 430 if (isa<ConvergenceControlInst>(I)) 431 return true; 432 if (auto *CB = dyn_cast<CallBase>(&I)) 433 if (CB->isConvergent()) 434 return CB->getConvergenceControlToken(); 435 } 436 } 437 return true; 438 } 439 440 /// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling 441 /// can only fail when the loop's latch block is not terminated by a conditional 442 /// branch instruction. However, if the trip count (and multiple) are not known, 443 /// loop unrolling will mostly produce more code that is no faster. 444 /// 445 /// If Runtime is true then UnrollLoop will try to insert a prologue or 446 /// epilogue that ensures the latch has a trip multiple of Count. UnrollLoop 447 /// will not runtime-unroll the loop if computing the run-time trip count will 448 /// be expensive and AllowExpensiveTripCount is false. 449 /// 450 /// The LoopInfo Analysis that is passed will be kept consistent. 451 /// 452 /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and 453 /// DominatorTree if they are non-null. 454 /// 455 /// If RemainderLoop is non-null, it will receive the remainder loop (if 456 /// required and not fully unrolled). 457 LoopUnrollResult 458 llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, 459 ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, 460 const TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, 461 bool PreserveLCSSA, Loop **RemainderLoop, AAResults *AA) { 462 assert(DT && "DomTree is required"); 463 464 if (!L->getLoopPreheader()) { 465 LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); 466 return LoopUnrollResult::Unmodified; 467 } 468 469 if (!L->getLoopLatch()) { 470 LLVM_DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); 471 return LoopUnrollResult::Unmodified; 472 } 473 474 // Loops with indirectbr cannot be cloned. 475 if (!L->isSafeToClone()) { 476 LLVM_DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n"); 477 return LoopUnrollResult::Unmodified; 478 } 479 480 if (L->getHeader()->hasAddressTaken()) { 481 // The loop-rotate pass can be helpful to avoid this in many cases. 482 LLVM_DEBUG( 483 dbgs() << " Won't unroll loop: address of header block is taken.\n"); 484 return LoopUnrollResult::Unmodified; 485 } 486 487 assert(ULO.Count > 0); 488 489 // All these values should be taken only after peeling because they might have 490 // changed. 491 BasicBlock *Preheader = L->getLoopPreheader(); 492 BasicBlock *Header = L->getHeader(); 493 BasicBlock *LatchBlock = L->getLoopLatch(); 494 SmallVector<BasicBlock *, 4> ExitBlocks; 495 L->getExitBlocks(ExitBlocks); 496 std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks(); 497 498 const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L); 499 const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L); 500 unsigned EstimatedLoopInvocationWeight = 0; 501 std::optional<unsigned> OriginalTripCount = 502 llvm::getLoopEstimatedTripCount(L, &EstimatedLoopInvocationWeight); 503 504 // Effectively "DCE" unrolled iterations that are beyond the max tripcount 505 // and will never be executed. 506 if (MaxTripCount && ULO.Count > MaxTripCount) 507 ULO.Count = MaxTripCount; 508 509 struct ExitInfo { 510 unsigned TripCount; 511 unsigned TripMultiple; 512 unsigned BreakoutTrip; 513 bool ExitOnTrue; 514 BasicBlock *FirstExitingBlock = nullptr; 515 SmallVector<BasicBlock *> ExitingBlocks; 516 }; 517 DenseMap<BasicBlock *, ExitInfo> ExitInfos; 518 SmallVector<BasicBlock *, 4> ExitingBlocks; 519 L->getExitingBlocks(ExitingBlocks); 520 for (auto *ExitingBlock : ExitingBlocks) { 521 // The folding code is not prepared to deal with non-branch instructions 522 // right now. 523 auto *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); 524 if (!BI) 525 continue; 526 527 ExitInfo &Info = ExitInfos[ExitingBlock]; 528 Info.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); 529 Info.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); 530 if (Info.TripCount != 0) { 531 Info.BreakoutTrip = Info.TripCount % ULO.Count; 532 Info.TripMultiple = 0; 533 } else { 534 Info.BreakoutTrip = Info.TripMultiple = 535 (unsigned)std::gcd(ULO.Count, Info.TripMultiple); 536 } 537 Info.ExitOnTrue = !L->contains(BI->getSuccessor(0)); 538 Info.ExitingBlocks.push_back(ExitingBlock); 539 LLVM_DEBUG(dbgs() << " Exiting block %" << ExitingBlock->getName() 540 << ": TripCount=" << Info.TripCount 541 << ", TripMultiple=" << Info.TripMultiple 542 << ", BreakoutTrip=" << Info.BreakoutTrip << "\n"); 543 } 544 545 // Are we eliminating the loop control altogether? Note that we can know 546 // we're eliminating the backedge without knowing exactly which iteration 547 // of the unrolled body exits. 548 const bool CompletelyUnroll = ULO.Count == MaxTripCount; 549 550 const bool PreserveOnlyFirst = CompletelyUnroll && MaxOrZero; 551 552 // There's no point in performing runtime unrolling if this unroll count 553 // results in a full unroll. 554 if (CompletelyUnroll) 555 ULO.Runtime = false; 556 557 // Go through all exits of L and see if there are any phi-nodes there. We just 558 // conservatively assume that they're inserted to preserve LCSSA form, which 559 // means that complete unrolling might break this form. We need to either fix 560 // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For 561 // now we just recompute LCSSA for the outer loop, but it should be possible 562 // to fix it in-place. 563 bool NeedToFixLCSSA = 564 PreserveLCSSA && CompletelyUnroll && 565 any_of(ExitBlocks, 566 [](const BasicBlock *BB) { return isa<PHINode>(BB->begin()); }); 567 568 // The current loop unroll pass can unroll loops that have 569 // (1) single latch; and 570 // (2a) latch is unconditional; or 571 // (2b) latch is conditional and is an exiting block 572 // FIXME: The implementation can be extended to work with more complicated 573 // cases, e.g. loops with multiple latches. 574 BranchInst *LatchBI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); 575 576 // A conditional branch which exits the loop, which can be optimized to an 577 // unconditional branch in the unrolled loop in some cases. 578 bool LatchIsExiting = L->isLoopExiting(LatchBlock); 579 if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) { 580 LLVM_DEBUG( 581 dbgs() << "Can't unroll; a conditional latch must exit the loop"); 582 return LoopUnrollResult::Unmodified; 583 } 584 585 assert((!ULO.Runtime || canHaveUnrollRemainder(L)) && 586 "Can't runtime unroll if loop contains a convergent operation."); 587 588 bool EpilogProfitability = 589 UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog 590 : isEpilogProfitable(L); 591 592 if (ULO.Runtime && 593 !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount, 594 EpilogProfitability, ULO.UnrollRemainder, 595 ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI, 596 PreserveLCSSA, ULO.SCEVExpansionBudget, 597 ULO.RuntimeUnrollMultiExit, RemainderLoop)) { 598 if (ULO.Force) 599 ULO.Runtime = false; 600 else { 601 LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be " 602 "generated when assuming runtime trip count\n"); 603 return LoopUnrollResult::Unmodified; 604 } 605 } 606 607 using namespace ore; 608 // Report the unrolling decision. 609 if (CompletelyUnroll) { 610 LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() 611 << " with trip count " << ULO.Count << "!\n"); 612 if (ORE) 613 ORE->emit([&]() { 614 return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), 615 L->getHeader()) 616 << "completely unrolled loop with " 617 << NV("UnrollCount", ULO.Count) << " iterations"; 618 }); 619 } else { 620 LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " 621 << ULO.Count); 622 if (ULO.Runtime) 623 LLVM_DEBUG(dbgs() << " with run-time trip count"); 624 LLVM_DEBUG(dbgs() << "!\n"); 625 626 if (ORE) 627 ORE->emit([&]() { 628 OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), 629 L->getHeader()); 630 Diag << "unrolled loop by a factor of " << NV("UnrollCount", ULO.Count); 631 if (ULO.Runtime) 632 Diag << " with run-time trip count"; 633 return Diag; 634 }); 635 } 636 637 // We are going to make changes to this loop. SCEV may be keeping cached info 638 // about it, in particular about backedge taken count. The changes we make 639 // are guaranteed to invalidate this information for our loop. It is tempting 640 // to only invalidate the loop being unrolled, but it is incorrect as long as 641 // all exiting branches from all inner loops have impact on the outer loops, 642 // and if something changes inside them then any of outer loops may also 643 // change. When we forget outermost loop, we also forget all contained loops 644 // and this is what we need here. 645 if (SE) { 646 if (ULO.ForgetAllSCEV) 647 SE->forgetAllLoops(); 648 else { 649 SE->forgetTopmostLoop(L); 650 SE->forgetBlockAndLoopDispositions(); 651 } 652 } 653 654 if (!LatchIsExiting) 655 ++NumUnrolledNotLatch; 656 657 // For the first iteration of the loop, we should use the precloned values for 658 // PHI nodes. Insert associations now. 659 ValueToValueMapTy LastValueMap; 660 std::vector<PHINode*> OrigPHINode; 661 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { 662 OrigPHINode.push_back(cast<PHINode>(I)); 663 } 664 665 std::vector<BasicBlock *> Headers; 666 std::vector<BasicBlock *> Latches; 667 Headers.push_back(Header); 668 Latches.push_back(LatchBlock); 669 670 // The current on-the-fly SSA update requires blocks to be processed in 671 // reverse postorder so that LastValueMap contains the correct value at each 672 // exit. 673 LoopBlocksDFS DFS(L); 674 DFS.perform(LI); 675 676 // Stash the DFS iterators before adding blocks to the loop. 677 LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); 678 LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); 679 680 std::vector<BasicBlock*> UnrolledLoopBlocks = L->getBlocks(); 681 682 // Loop Unrolling might create new loops. While we do preserve LoopInfo, we 683 // might break loop-simplified form for these loops (as they, e.g., would 684 // share the same exit blocks). We'll keep track of loops for which we can 685 // break this so that later we can re-simplify them. 686 SmallSetVector<Loop *, 4> LoopsToSimplify; 687 for (Loop *SubLoop : *L) 688 LoopsToSimplify.insert(SubLoop); 689 690 // When a FSDiscriminator is enabled, we don't need to add the multiply 691 // factors to the discriminators. 692 if (Header->getParent()->shouldEmitDebugInfoForProfiling() && 693 !EnableFSDiscriminator) 694 for (BasicBlock *BB : L->getBlocks()) 695 for (Instruction &I : *BB) 696 if (!I.isDebugOrPseudoInst()) 697 if (const DILocation *DIL = I.getDebugLoc()) { 698 auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(ULO.Count); 699 if (NewDIL) 700 I.setDebugLoc(*NewDIL); 701 else 702 LLVM_DEBUG(dbgs() 703 << "Failed to create new discriminator: " 704 << DIL->getFilename() << " Line: " << DIL->getLine()); 705 } 706 707 // Identify what noalias metadata is inside the loop: if it is inside the 708 // loop, the associated metadata must be cloned for each iteration. 709 SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes; 710 identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes); 711 712 // We place the unrolled iterations immediately after the original loop 713 // latch. This is a reasonable default placement if we don't have block 714 // frequencies, and if we do, well the layout will be adjusted later. 715 auto BlockInsertPt = std::next(LatchBlock->getIterator()); 716 for (unsigned It = 1; It != ULO.Count; ++It) { 717 SmallVector<BasicBlock *, 8> NewBlocks; 718 SmallDenseMap<const Loop *, Loop *, 4> NewLoops; 719 NewLoops[L] = L; 720 721 for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { 722 ValueToValueMapTy VMap; 723 BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); 724 Header->getParent()->insert(BlockInsertPt, New); 725 726 assert((*BB != Header || LI->getLoopFor(*BB) == L) && 727 "Header should not be in a sub-loop"); 728 // Tell LI about New. 729 const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops); 730 if (OldLoop) 731 LoopsToSimplify.insert(NewLoops[OldLoop]); 732 733 if (*BB == Header) { 734 // Loop over all of the PHI nodes in the block, changing them to use 735 // the incoming values from the previous block. 736 for (PHINode *OrigPHI : OrigPHINode) { 737 PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]); 738 Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); 739 if (Instruction *InValI = dyn_cast<Instruction>(InVal)) 740 if (It > 1 && L->contains(InValI)) 741 InVal = LastValueMap[InValI]; 742 VMap[OrigPHI] = InVal; 743 NewPHI->eraseFromParent(); 744 } 745 746 // Eliminate copies of the loop heart intrinsic, if any. 747 if (ULO.Heart) { 748 auto it = VMap.find(ULO.Heart); 749 assert(it != VMap.end()); 750 Instruction *heartCopy = cast<Instruction>(it->second); 751 heartCopy->eraseFromParent(); 752 VMap.erase(it); 753 } 754 } 755 756 // Update our running map of newest clones 757 LastValueMap[*BB] = New; 758 for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); 759 VI != VE; ++VI) 760 LastValueMap[VI->first] = VI->second; 761 762 // Add phi entries for newly created values to all exit blocks. 763 for (BasicBlock *Succ : successors(*BB)) { 764 if (L->contains(Succ)) 765 continue; 766 for (PHINode &PHI : Succ->phis()) { 767 Value *Incoming = PHI.getIncomingValueForBlock(*BB); 768 ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); 769 if (It != LastValueMap.end()) 770 Incoming = It->second; 771 PHI.addIncoming(Incoming, New); 772 SE->forgetLcssaPhiWithNewPredecessor(L, &PHI); 773 } 774 } 775 // Keep track of new headers and latches as we create them, so that 776 // we can insert the proper branches later. 777 if (*BB == Header) 778 Headers.push_back(New); 779 if (*BB == LatchBlock) 780 Latches.push_back(New); 781 782 // Keep track of the exiting block and its successor block contained in 783 // the loop for the current iteration. 784 auto ExitInfoIt = ExitInfos.find(*BB); 785 if (ExitInfoIt != ExitInfos.end()) 786 ExitInfoIt->second.ExitingBlocks.push_back(New); 787 788 NewBlocks.push_back(New); 789 UnrolledLoopBlocks.push_back(New); 790 791 // Update DomTree: since we just copy the loop body, and each copy has a 792 // dedicated entry block (copy of the header block), this header's copy 793 // dominates all copied blocks. That means, dominance relations in the 794 // copied body are the same as in the original body. 795 if (*BB == Header) 796 DT->addNewBlock(New, Latches[It - 1]); 797 else { 798 auto BBDomNode = DT->getNode(*BB); 799 auto BBIDom = BBDomNode->getIDom(); 800 BasicBlock *OriginalBBIDom = BBIDom->getBlock(); 801 DT->addNewBlock( 802 New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)])); 803 } 804 } 805 806 // Remap all instructions in the most recent iteration 807 remapInstructionsInBlocks(NewBlocks, LastValueMap); 808 for (BasicBlock *NewBlock : NewBlocks) 809 for (Instruction &I : *NewBlock) 810 if (auto *II = dyn_cast<AssumeInst>(&I)) 811 AC->registerAssumption(II); 812 813 { 814 // Identify what other metadata depends on the cloned version. After 815 // cloning, replace the metadata with the corrected version for both 816 // memory instructions and noalias intrinsics. 817 std::string ext = (Twine("It") + Twine(It)).str(); 818 cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks, 819 Header->getContext(), ext); 820 } 821 } 822 823 // Loop over the PHI nodes in the original block, setting incoming values. 824 for (PHINode *PN : OrigPHINode) { 825 if (CompletelyUnroll) { 826 PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); 827 PN->eraseFromParent(); 828 } else if (ULO.Count > 1) { 829 Value *InVal = PN->removeIncomingValue(LatchBlock, false); 830 // If this value was defined in the loop, take the value defined by the 831 // last iteration of the loop. 832 if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { 833 if (L->contains(InValI)) 834 InVal = LastValueMap[InVal]; 835 } 836 assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); 837 PN->addIncoming(InVal, Latches.back()); 838 } 839 } 840 841 // Connect latches of the unrolled iterations to the headers of the next 842 // iteration. Currently they point to the header of the same iteration. 843 for (unsigned i = 0, e = Latches.size(); i != e; ++i) { 844 unsigned j = (i + 1) % e; 845 Latches[i]->getTerminator()->replaceSuccessorWith(Headers[i], Headers[j]); 846 } 847 848 // Update dominators of blocks we might reach through exits. 849 // Immediate dominator of such block might change, because we add more 850 // routes which can lead to the exit: we can now reach it from the copied 851 // iterations too. 852 if (ULO.Count > 1) { 853 for (auto *BB : OriginalLoopBlocks) { 854 auto *BBDomNode = DT->getNode(BB); 855 SmallVector<BasicBlock *, 16> ChildrenToUpdate; 856 for (auto *ChildDomNode : BBDomNode->children()) { 857 auto *ChildBB = ChildDomNode->getBlock(); 858 if (!L->contains(ChildBB)) 859 ChildrenToUpdate.push_back(ChildBB); 860 } 861 // The new idom of the block will be the nearest common dominator 862 // of all copies of the previous idom. This is equivalent to the 863 // nearest common dominator of the previous idom and the first latch, 864 // which dominates all copies of the previous idom. 865 BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, LatchBlock); 866 for (auto *ChildBB : ChildrenToUpdate) 867 DT->changeImmediateDominator(ChildBB, NewIDom); 868 } 869 } 870 871 assert(!UnrollVerifyDomtree || 872 DT->verify(DominatorTree::VerificationLevel::Fast)); 873 874 SmallVector<DominatorTree::UpdateType> DTUpdates; 875 auto SetDest = [&](BasicBlock *Src, bool WillExit, bool ExitOnTrue) { 876 auto *Term = cast<BranchInst>(Src->getTerminator()); 877 const unsigned Idx = ExitOnTrue ^ WillExit; 878 BasicBlock *Dest = Term->getSuccessor(Idx); 879 BasicBlock *DeadSucc = Term->getSuccessor(1-Idx); 880 881 // Remove predecessors from all non-Dest successors. 882 DeadSucc->removePredecessor(Src, /* KeepOneInputPHIs */ true); 883 884 // Replace the conditional branch with an unconditional one. 885 auto *BI = BranchInst::Create(Dest, Term->getIterator()); 886 BI->setDebugLoc(Term->getDebugLoc()); 887 Term->eraseFromParent(); 888 889 DTUpdates.emplace_back(DominatorTree::Delete, Src, DeadSucc); 890 }; 891 892 auto WillExit = [&](const ExitInfo &Info, unsigned i, unsigned j, 893 bool IsLatch) -> std::optional<bool> { 894 if (CompletelyUnroll) { 895 if (PreserveOnlyFirst) { 896 if (i == 0) 897 return std::nullopt; 898 return j == 0; 899 } 900 // Complete (but possibly inexact) unrolling 901 if (j == 0) 902 return true; 903 if (Info.TripCount && j != Info.TripCount) 904 return false; 905 return std::nullopt; 906 } 907 908 if (ULO.Runtime) { 909 // If runtime unrolling inserts a prologue, information about non-latch 910 // exits may be stale. 911 if (IsLatch && j != 0) 912 return false; 913 return std::nullopt; 914 } 915 916 if (j != Info.BreakoutTrip && 917 (Info.TripMultiple == 0 || j % Info.TripMultiple != 0)) { 918 // If we know the trip count or a multiple of it, we can safely use an 919 // unconditional branch for some iterations. 920 return false; 921 } 922 return std::nullopt; 923 }; 924 925 // Fold branches for iterations where we know that they will exit or not 926 // exit. 927 for (auto &Pair : ExitInfos) { 928 ExitInfo &Info = Pair.second; 929 for (unsigned i = 0, e = Info.ExitingBlocks.size(); i != e; ++i) { 930 // The branch destination. 931 unsigned j = (i + 1) % e; 932 bool IsLatch = Pair.first == LatchBlock; 933 std::optional<bool> KnownWillExit = WillExit(Info, i, j, IsLatch); 934 if (!KnownWillExit) { 935 if (!Info.FirstExitingBlock) 936 Info.FirstExitingBlock = Info.ExitingBlocks[i]; 937 continue; 938 } 939 940 // We don't fold known-exiting branches for non-latch exits here, 941 // because this ensures that both all loop blocks and all exit blocks 942 // remain reachable in the CFG. 943 // TODO: We could fold these branches, but it would require much more 944 // sophisticated updates to LoopInfo. 945 if (*KnownWillExit && !IsLatch) { 946 if (!Info.FirstExitingBlock) 947 Info.FirstExitingBlock = Info.ExitingBlocks[i]; 948 continue; 949 } 950 951 SetDest(Info.ExitingBlocks[i], *KnownWillExit, Info.ExitOnTrue); 952 } 953 } 954 955 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); 956 DomTreeUpdater *DTUToUse = &DTU; 957 if (ExitingBlocks.size() == 1 && ExitInfos.size() == 1) { 958 // Manually update the DT if there's a single exiting node. In that case 959 // there's a single exit node and it is sufficient to update the nodes 960 // immediately dominated by the original exiting block. They will become 961 // dominated by the first exiting block that leaves the loop after 962 // unrolling. Note that the CFG inside the loop does not change, so there's 963 // no need to update the DT inside the unrolled loop. 964 DTUToUse = nullptr; 965 auto &[OriginalExit, Info] = *ExitInfos.begin(); 966 if (!Info.FirstExitingBlock) 967 Info.FirstExitingBlock = Info.ExitingBlocks.back(); 968 for (auto *C : to_vector(DT->getNode(OriginalExit)->children())) { 969 if (L->contains(C->getBlock())) 970 continue; 971 C->setIDom(DT->getNode(Info.FirstExitingBlock)); 972 } 973 } else { 974 DTU.applyUpdates(DTUpdates); 975 } 976 977 // When completely unrolling, the last latch becomes unreachable. 978 if (!LatchIsExiting && CompletelyUnroll) { 979 // There is no need to update the DT here, because there must be a unique 980 // latch. Hence if the latch is not exiting it must directly branch back to 981 // the original loop header and does not dominate any nodes. 982 assert(LatchBlock->getSingleSuccessor() && "Loop with multiple latches?"); 983 changeToUnreachable(Latches.back()->getTerminator(), PreserveLCSSA); 984 } 985 986 // Merge adjacent basic blocks, if possible. 987 for (BasicBlock *Latch : Latches) { 988 BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator()); 989 assert((Term || 990 (CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) && 991 "Need a branch as terminator, except when fully unrolling with " 992 "unconditional latch"); 993 if (Term && Term->isUnconditional()) { 994 BasicBlock *Dest = Term->getSuccessor(0); 995 BasicBlock *Fold = Dest->getUniquePredecessor(); 996 if (MergeBlockIntoPredecessor(Dest, /*DTU=*/DTUToUse, LI, 997 /*MSSAU=*/nullptr, /*MemDep=*/nullptr, 998 /*PredecessorWithTwoSuccessors=*/false, 999 DTUToUse ? nullptr : DT)) { 1000 // Dest has been folded into Fold. Update our worklists accordingly. 1001 std::replace(Latches.begin(), Latches.end(), Dest, Fold); 1002 llvm::erase(UnrolledLoopBlocks, Dest); 1003 } 1004 } 1005 } 1006 1007 if (DTUToUse) { 1008 // Apply updates to the DomTree. 1009 DT = &DTU.getDomTree(); 1010 } 1011 assert(!UnrollVerifyDomtree || 1012 DT->verify(DominatorTree::VerificationLevel::Fast)); 1013 1014 // At this point, the code is well formed. We now simplify the unrolled loop, 1015 // doing constant propagation and dead code elimination as we go. 1016 simplifyLoopAfterUnroll(L, !CompletelyUnroll && ULO.Count > 1, LI, SE, DT, AC, 1017 TTI, AA); 1018 1019 NumCompletelyUnrolled += CompletelyUnroll; 1020 ++NumUnrolled; 1021 1022 Loop *OuterL = L->getParentLoop(); 1023 // Update LoopInfo if the loop is completely removed. 1024 if (CompletelyUnroll) { 1025 LI->erase(L); 1026 // We shouldn't try to use `L` anymore. 1027 L = nullptr; 1028 } else if (OriginalTripCount) { 1029 // Update the trip count. Note that the remainder has already logic 1030 // computing it in `UnrollRuntimeLoopRemainder`. 1031 setLoopEstimatedTripCount(L, *OriginalTripCount / ULO.Count, 1032 EstimatedLoopInvocationWeight); 1033 } 1034 1035 // LoopInfo should not be valid, confirm that. 1036 if (UnrollVerifyLoopInfo) 1037 LI->verify(*DT); 1038 1039 // After complete unrolling most of the blocks should be contained in OuterL. 1040 // However, some of them might happen to be out of OuterL (e.g. if they 1041 // precede a loop exit). In this case we might need to insert PHI nodes in 1042 // order to preserve LCSSA form. 1043 // We don't need to check this if we already know that we need to fix LCSSA 1044 // form. 1045 // TODO: For now we just recompute LCSSA for the outer loop in this case, but 1046 // it should be possible to fix it in-place. 1047 if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA) 1048 NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI); 1049 1050 // Make sure that loop-simplify form is preserved. We want to simplify 1051 // at least one layer outside of the loop that was unrolled so that any 1052 // changes to the parent loop exposed by the unrolling are considered. 1053 if (OuterL) { 1054 // OuterL includes all loops for which we can break loop-simplify, so 1055 // it's sufficient to simplify only it (it'll recursively simplify inner 1056 // loops too). 1057 if (NeedToFixLCSSA) { 1058 // LCSSA must be performed on the outermost affected loop. The unrolled 1059 // loop's last loop latch is guaranteed to be in the outermost loop 1060 // after LoopInfo's been updated by LoopInfo::erase. 1061 Loop *LatchLoop = LI->getLoopFor(Latches.back()); 1062 Loop *FixLCSSALoop = OuterL; 1063 if (!FixLCSSALoop->contains(LatchLoop)) 1064 while (FixLCSSALoop->getParentLoop() != LatchLoop) 1065 FixLCSSALoop = FixLCSSALoop->getParentLoop(); 1066 1067 formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE); 1068 } else if (PreserveLCSSA) { 1069 assert(OuterL->isLCSSAForm(*DT) && 1070 "Loops should be in LCSSA form after loop-unroll."); 1071 } 1072 1073 // TODO: That potentially might be compile-time expensive. We should try 1074 // to fix the loop-simplified form incrementally. 1075 simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA); 1076 } else { 1077 // Simplify loops for which we might've broken loop-simplify form. 1078 for (Loop *SubLoop : LoopsToSimplify) 1079 simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA); 1080 } 1081 1082 return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled 1083 : LoopUnrollResult::PartiallyUnrolled; 1084 } 1085 1086 /// Given an llvm.loop loop id metadata node, returns the loop hint metadata 1087 /// node with the given name (for example, "llvm.loop.unroll.count"). If no 1088 /// such metadata node exists, then nullptr is returned. 1089 MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) { 1090 // First operand should refer to the loop id itself. 1091 assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 1092 assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 1093 1094 for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) { 1095 MDNode *MD = dyn_cast<MDNode>(MDO); 1096 if (!MD) 1097 continue; 1098 1099 MDString *S = dyn_cast<MDString>(MD->getOperand(0)); 1100 if (!S) 1101 continue; 1102 1103 if (Name == S->getString()) 1104 return MD; 1105 } 1106 return nullptr; 1107 } 1108