xref: /llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp (revision b85a402dd899fc0e702a60b459bc06317d532d84)
1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This is the LLVM vectorization plan. It represents a candidate for
11 /// vectorization, allowing to plan and optimize how to vectorize a given loop
12 /// before generating LLVM-IR.
13 /// The vectorizer uses vectorization plans to estimate the costs of potential
14 /// candidates and if profitable to execute the desired plan, generating vector
15 /// LLVM-IR code.
16 ///
17 //===----------------------------------------------------------------------===//
18 
19 #include "VPlan.h"
20 #include "VPlanCFG.h"
21 #include "VPlanDominatorTree.h"
22 #include "llvm/ADT/DepthFirstIterator.h"
23 #include "llvm/ADT/PostOrderIterator.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/Analysis/LoopInfo.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/CFG.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/Support/Casting.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/GenericDomTreeConstruction.h"
39 #include "llvm/Support/GraphWriter.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
42 #include "llvm/Transforms/Utils/LoopVersioning.h"
43 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
44 #include <cassert>
45 #include <string>
46 #include <vector>
47 
48 using namespace llvm;
49 
50 namespace llvm {
51 extern cl::opt<bool> EnableVPlanNativePath;
52 }
53 
54 #define DEBUG_TYPE "vplan"
55 
56 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
57 raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) {
58   const VPInstruction *Instr = dyn_cast<VPInstruction>(&V);
59   VPSlotTracker SlotTracker(
60       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
61   V.print(OS, SlotTracker);
62   return OS;
63 }
64 #endif
65 
66 Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder,
67                                 const ElementCount &VF) const {
68   switch (LaneKind) {
69   case VPLane::Kind::ScalableLast:
70     // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
71     return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
72                              Builder.getInt32(VF.getKnownMinValue() - Lane));
73   case VPLane::Kind::First:
74     return Builder.getInt32(Lane);
75   }
76   llvm_unreachable("Unknown lane kind");
77 }
78 
79 VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
80     : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
81   if (Def)
82     Def->addDefinedValue(this);
83 }
84 
85 VPValue::~VPValue() {
86   assert(Users.empty() && "trying to delete a VPValue with remaining users");
87   if (Def)
88     Def->removeDefinedValue(this);
89 }
90 
91 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
92 void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const {
93   if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def))
94     R->print(OS, "", SlotTracker);
95   else
96     printAsOperand(OS, SlotTracker);
97 }
98 
99 void VPValue::dump() const {
100   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
101   VPSlotTracker SlotTracker(
102       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
103   print(dbgs(), SlotTracker);
104   dbgs() << "\n";
105 }
106 
107 void VPDef::dump() const {
108   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
109   VPSlotTracker SlotTracker(
110       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
111   print(dbgs(), "", SlotTracker);
112   dbgs() << "\n";
113 }
114 #endif
115 
116 VPRecipeBase *VPValue::getDefiningRecipe() {
117   return cast_or_null<VPRecipeBase>(Def);
118 }
119 
120 const VPRecipeBase *VPValue::getDefiningRecipe() const {
121   return cast_or_null<VPRecipeBase>(Def);
122 }
123 
124 // Get the top-most entry block of \p Start. This is the entry block of the
125 // containing VPlan. This function is templated to support both const and non-const blocks
126 template <typename T> static T *getPlanEntry(T *Start) {
127   T *Next = Start;
128   T *Current = Start;
129   while ((Next = Next->getParent()))
130     Current = Next;
131 
132   SmallSetVector<T *, 8> WorkList;
133   WorkList.insert(Current);
134 
135   for (unsigned i = 0; i < WorkList.size(); i++) {
136     T *Current = WorkList[i];
137     if (Current->getNumPredecessors() == 0)
138       return Current;
139     auto &Predecessors = Current->getPredecessors();
140     WorkList.insert(Predecessors.begin(), Predecessors.end());
141   }
142 
143   llvm_unreachable("VPlan without any entry node without predecessors");
144 }
145 
146 VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
147 
148 const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
149 
150 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
151 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
152   const VPBlockBase *Block = this;
153   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
154     Block = Region->getEntry();
155   return cast<VPBasicBlock>(Block);
156 }
157 
158 VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
159   VPBlockBase *Block = this;
160   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
161     Block = Region->getEntry();
162   return cast<VPBasicBlock>(Block);
163 }
164 
165 void VPBlockBase::setPlan(VPlan *ParentPlan) {
166   assert(
167       (ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) &&
168       "Can only set plan on its entry or preheader block.");
169   Plan = ParentPlan;
170 }
171 
172 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
173 const VPBasicBlock *VPBlockBase::getExitingBasicBlock() const {
174   const VPBlockBase *Block = this;
175   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
176     Block = Region->getExiting();
177   return cast<VPBasicBlock>(Block);
178 }
179 
180 VPBasicBlock *VPBlockBase::getExitingBasicBlock() {
181   VPBlockBase *Block = this;
182   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
183     Block = Region->getExiting();
184   return cast<VPBasicBlock>(Block);
185 }
186 
187 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
188   if (!Successors.empty() || !Parent)
189     return this;
190   assert(Parent->getExiting() == this &&
191          "Block w/o successors not the exiting block of its parent.");
192   return Parent->getEnclosingBlockWithSuccessors();
193 }
194 
195 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
196   if (!Predecessors.empty() || !Parent)
197     return this;
198   assert(Parent->getEntry() == this &&
199          "Block w/o predecessors not the entry of its parent.");
200   return Parent->getEnclosingBlockWithPredecessors();
201 }
202 
203 void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
204   for (VPBlockBase *Block : to_vector(vp_depth_first_shallow(Entry)))
205     delete Block;
206 }
207 
208 VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
209   iterator It = begin();
210   while (It != end() && It->isPhi())
211     It++;
212   return It;
213 }
214 
215 Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
216   if (Def->isLiveIn())
217     return Def->getLiveInIRValue();
218 
219   if (hasScalarValue(Def, Instance)) {
220     return Data
221         .PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)];
222   }
223 
224   assert(hasVectorValue(Def, Instance.Part));
225   auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
226   if (!VecPart->getType()->isVectorTy()) {
227     assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar");
228     return VecPart;
229   }
230   // TODO: Cache created scalar values.
231   Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF);
232   auto *Extract = Builder.CreateExtractElement(VecPart, Lane);
233   // set(Def, Extract, Instance);
234   return Extract;
235 }
236 BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
237   VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
238   return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
239 }
240 
241 void VPTransformState::addNewMetadata(Instruction *To,
242                                       const Instruction *Orig) {
243   // If the loop was versioned with memchecks, add the corresponding no-alias
244   // metadata.
245   if (LVer && (isa<LoadInst>(Orig) || isa<StoreInst>(Orig)))
246     LVer->annotateInstWithNoAlias(To, Orig);
247 }
248 
249 void VPTransformState::addMetadata(Instruction *To, Instruction *From) {
250   propagateMetadata(To, From);
251   addNewMetadata(To, From);
252 }
253 
254 void VPTransformState::addMetadata(ArrayRef<Value *> To, Instruction *From) {
255   for (Value *V : To) {
256     if (Instruction *I = dyn_cast<Instruction>(V))
257       addMetadata(I, From);
258   }
259 }
260 
261 void VPTransformState::setDebugLocFromInst(const Value *V) {
262   const Instruction *Inst = dyn_cast<Instruction>(V);
263   if (!Inst) {
264     Builder.SetCurrentDebugLocation(DebugLoc());
265     return;
266   }
267 
268   const DILocation *DIL = Inst->getDebugLoc();
269   // When a FSDiscriminator is enabled, we don't need to add the multiply
270   // factors to the discriminators.
271   if (DIL && Inst->getFunction()->shouldEmitDebugInfoForProfiling() &&
272       !isa<DbgInfoIntrinsic>(Inst) && !EnableFSDiscriminator) {
273     // FIXME: For scalable vectors, assume vscale=1.
274     auto NewDIL =
275         DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
276     if (NewDIL)
277       Builder.SetCurrentDebugLocation(*NewDIL);
278     else
279       LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
280                         << DIL->getFilename() << " Line: " << DIL->getLine());
281   } else
282     Builder.SetCurrentDebugLocation(DIL);
283 }
284 
285 BasicBlock *
286 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
287   // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
288   // Pred stands for Predessor. Prev stands for Previous - last visited/created.
289   BasicBlock *PrevBB = CFG.PrevBB;
290   BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
291                                          PrevBB->getParent(), CFG.ExitBB);
292   LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
293 
294   // Hook up the new basic block to its predecessors.
295   for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
296     VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
297     auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
298     BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
299 
300     assert(PredBB && "Predecessor basic-block not found building successor.");
301     auto *PredBBTerminator = PredBB->getTerminator();
302     LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
303 
304     auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
305     if (isa<UnreachableInst>(PredBBTerminator)) {
306       assert(PredVPSuccessors.size() == 1 &&
307              "Predecessor ending w/o branch must have single successor.");
308       DebugLoc DL = PredBBTerminator->getDebugLoc();
309       PredBBTerminator->eraseFromParent();
310       auto *Br = BranchInst::Create(NewBB, PredBB);
311       Br->setDebugLoc(DL);
312     } else if (TermBr && !TermBr->isConditional()) {
313       TermBr->setSuccessor(0, NewBB);
314     } else {
315       // Set each forward successor here when it is created, excluding
316       // backedges. A backward successor is set when the branch is created.
317       unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
318       assert(!TermBr->getSuccessor(idx) &&
319              "Trying to reset an existing successor block.");
320       TermBr->setSuccessor(idx, NewBB);
321     }
322   }
323   return NewBB;
324 }
325 
326 void VPBasicBlock::execute(VPTransformState *State) {
327   bool Replica = State->Instance && !State->Instance->isFirstIteration();
328   VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
329   VPBlockBase *SingleHPred = nullptr;
330   BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
331 
332   auto IsLoopRegion = [](VPBlockBase *BB) {
333     auto *R = dyn_cast<VPRegionBlock>(BB);
334     return R && !R->isReplicator();
335   };
336 
337   // 1. Create an IR basic block, or reuse the last one or ExitBB if possible.
338   if (getPlan()->getVectorLoopRegion()->getSingleSuccessor() == this) {
339     // ExitBB can be re-used for the exit block of the Plan.
340     NewBB = State->CFG.ExitBB;
341     State->CFG.PrevBB = NewBB;
342 
343     // Update the branch instruction in the predecessor to branch to ExitBB.
344     VPBlockBase *PredVPB = getSingleHierarchicalPredecessor();
345     VPBasicBlock *ExitingVPBB = PredVPB->getExitingBasicBlock();
346     assert(PredVPB->getSingleSuccessor() == this &&
347            "predecessor must have the current block as only successor");
348     BasicBlock *ExitingBB = State->CFG.VPBB2IRBB[ExitingVPBB];
349     // The Exit block of a loop is always set to be successor 0 of the Exiting
350     // block.
351     cast<BranchInst>(ExitingBB->getTerminator())->setSuccessor(0, NewBB);
352   } else if (PrevVPBB && /* A */
353              !((SingleHPred = getSingleHierarchicalPredecessor()) &&
354                SingleHPred->getExitingBasicBlock() == PrevVPBB &&
355                PrevVPBB->getSingleHierarchicalSuccessor() &&
356                (SingleHPred->getParent() == getEnclosingLoopRegion() &&
357                 !IsLoopRegion(SingleHPred))) &&         /* B */
358              !(Replica && getPredecessors().empty())) { /* C */
359     // The last IR basic block is reused, as an optimization, in three cases:
360     // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null;
361     // B. when the current VPBB has a single (hierarchical) predecessor which
362     //    is PrevVPBB and the latter has a single (hierarchical) successor which
363     //    both are in the same non-replicator region; and
364     // C. when the current VPBB is an entry of a region replica - where PrevVPBB
365     //    is the exiting VPBB of this region from a previous instance, or the
366     //    predecessor of this region.
367 
368     NewBB = createEmptyBasicBlock(State->CFG);
369     State->Builder.SetInsertPoint(NewBB);
370     // Temporarily terminate with unreachable until CFG is rewired.
371     UnreachableInst *Terminator = State->Builder.CreateUnreachable();
372     // Register NewBB in its loop. In innermost loops its the same for all
373     // BB's.
374     if (State->CurrentVectorLoop)
375       State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
376     State->Builder.SetInsertPoint(Terminator);
377     State->CFG.PrevBB = NewBB;
378   }
379 
380   // 2. Fill the IR basic block with IR instructions.
381   LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
382                     << " in BB:" << NewBB->getName() << '\n');
383 
384   State->CFG.VPBB2IRBB[this] = NewBB;
385   State->CFG.PrevVPBB = this;
386 
387   for (VPRecipeBase &Recipe : Recipes)
388     Recipe.execute(*State);
389 
390   LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
391 }
392 
393 void VPBasicBlock::dropAllReferences(VPValue *NewValue) {
394   for (VPRecipeBase &R : Recipes) {
395     for (auto *Def : R.definedValues())
396       Def->replaceAllUsesWith(NewValue);
397 
398     for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
399       R.setOperand(I, NewValue);
400   }
401 }
402 
403 VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
404   assert((SplitAt == end() || SplitAt->getParent() == this) &&
405          "can only split at a position in the same block");
406 
407   SmallVector<VPBlockBase *, 2> Succs(successors());
408   // First, disconnect the current block from its successors.
409   for (VPBlockBase *Succ : Succs)
410     VPBlockUtils::disconnectBlocks(this, Succ);
411 
412   // Create new empty block after the block to split.
413   auto *SplitBlock = new VPBasicBlock(getName() + ".split");
414   VPBlockUtils::insertBlockAfter(SplitBlock, this);
415 
416   // Add successors for block to split to new block.
417   for (VPBlockBase *Succ : Succs)
418     VPBlockUtils::connectBlocks(SplitBlock, Succ);
419 
420   // Finally, move the recipes starting at SplitAt to new block.
421   for (VPRecipeBase &ToMove :
422        make_early_inc_range(make_range(SplitAt, this->end())))
423     ToMove.moveBefore(*SplitBlock, SplitBlock->end());
424 
425   return SplitBlock;
426 }
427 
428 VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
429   VPRegionBlock *P = getParent();
430   if (P && P->isReplicator()) {
431     P = P->getParent();
432     assert(!cast<VPRegionBlock>(P)->isReplicator() &&
433            "unexpected nested replicate regions");
434   }
435   return P;
436 }
437 
438 static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
439   if (VPBB->empty()) {
440     assert(
441         VPBB->getNumSuccessors() < 2 &&
442         "block with multiple successors doesn't have a recipe as terminator");
443     return false;
444   }
445 
446   const VPRecipeBase *R = &VPBB->back();
447   auto *VPI = dyn_cast<VPInstruction>(R);
448   bool IsCondBranch =
449       isa<VPBranchOnMaskRecipe>(R) ||
450       (VPI && (VPI->getOpcode() == VPInstruction::BranchOnCond ||
451                VPI->getOpcode() == VPInstruction::BranchOnCount));
452   (void)IsCondBranch;
453 
454   if (VPBB->getNumSuccessors() >= 2 || VPBB->isExiting()) {
455     assert(IsCondBranch && "block with multiple successors not terminated by "
456                            "conditional branch recipe");
457 
458     return true;
459   }
460 
461   assert(
462       !IsCondBranch &&
463       "block with 0 or 1 successors terminated by conditional branch recipe");
464   return false;
465 }
466 
467 VPRecipeBase *VPBasicBlock::getTerminator() {
468   if (hasConditionalTerminator(this))
469     return &back();
470   return nullptr;
471 }
472 
473 const VPRecipeBase *VPBasicBlock::getTerminator() const {
474   if (hasConditionalTerminator(this))
475     return &back();
476   return nullptr;
477 }
478 
479 bool VPBasicBlock::isExiting() const {
480   return getParent()->getExitingBasicBlock() == this;
481 }
482 
483 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
484 void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
485   if (getSuccessors().empty()) {
486     O << Indent << "No successors\n";
487   } else {
488     O << Indent << "Successor(s): ";
489     ListSeparator LS;
490     for (auto *Succ : getSuccessors())
491       O << LS << Succ->getName();
492     O << '\n';
493   }
494 }
495 
496 void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
497                          VPSlotTracker &SlotTracker) const {
498   O << Indent << getName() << ":\n";
499 
500   auto RecipeIndent = Indent + "  ";
501   for (const VPRecipeBase &Recipe : *this) {
502     Recipe.print(O, RecipeIndent, SlotTracker);
503     O << '\n';
504   }
505 
506   printSuccessors(O, Indent);
507 }
508 #endif
509 
510 void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
511   for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
512     // Drop all references in VPBasicBlocks and replace all uses with
513     // DummyValue.
514     Block->dropAllReferences(NewValue);
515 }
516 
517 void VPRegionBlock::execute(VPTransformState *State) {
518   ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
519       RPOT(Entry);
520 
521   if (!isReplicator()) {
522     // Create and register the new vector loop.
523     Loop *PrevLoop = State->CurrentVectorLoop;
524     State->CurrentVectorLoop = State->LI->AllocateLoop();
525     BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
526     Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
527 
528     // Insert the new loop into the loop nest and register the new basic blocks
529     // before calling any utilities such as SCEV that require valid LoopInfo.
530     if (ParentLoop)
531       ParentLoop->addChildLoop(State->CurrentVectorLoop);
532     else
533       State->LI->addTopLevelLoop(State->CurrentVectorLoop);
534 
535     // Visit the VPBlocks connected to "this", starting from it.
536     for (VPBlockBase *Block : RPOT) {
537       LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
538       Block->execute(State);
539     }
540 
541     State->CurrentVectorLoop = PrevLoop;
542     return;
543   }
544 
545   assert(!State->Instance && "Replicating a Region with non-null instance.");
546 
547   // Enter replicating mode.
548   State->Instance = VPIteration(0, 0);
549 
550   for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
551     State->Instance->Part = Part;
552     assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
553     for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
554          ++Lane) {
555       State->Instance->Lane = VPLane(Lane, VPLane::Kind::First);
556       // Visit the VPBlocks connected to \p this, starting from it.
557       for (VPBlockBase *Block : RPOT) {
558         LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
559         Block->execute(State);
560       }
561     }
562   }
563 
564   // Exit replicating mode.
565   State->Instance.reset();
566 }
567 
568 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
569 void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
570                           VPSlotTracker &SlotTracker) const {
571   O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
572   auto NewIndent = Indent + "  ";
573   for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
574     O << '\n';
575     BlockBase->print(O, NewIndent, SlotTracker);
576   }
577   O << Indent << "}\n";
578 
579   printSuccessors(O, Indent);
580 }
581 #endif
582 
583 VPlan::~VPlan() {
584   for (auto &KV : LiveOuts)
585     delete KV.second;
586   LiveOuts.clear();
587 
588   if (Entry) {
589     VPValue DummyValue;
590     for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
591       Block->dropAllReferences(&DummyValue);
592 
593     VPBlockBase::deleteCFG(Entry);
594   }
595   for (VPValue *VPV : VPLiveInsToFree)
596     delete VPV;
597   if (BackedgeTakenCount)
598     delete BackedgeTakenCount;
599 }
600 
601 VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE) {
602   VPBasicBlock *Preheader = new VPBasicBlock("ph");
603   VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
604   auto Plan = std::make_unique<VPlan>(Preheader, VecPreheader);
605   Plan->TripCount =
606       vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE);
607   return Plan;
608 }
609 
610 VPActiveLaneMaskPHIRecipe *VPlan::getActiveLaneMaskPhi() {
611   VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
612   for (VPRecipeBase &R : Header->phis()) {
613     if (isa<VPActiveLaneMaskPHIRecipe>(&R))
614       return cast<VPActiveLaneMaskPHIRecipe>(&R);
615   }
616   return nullptr;
617 }
618 
619 void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
620                              Value *CanonicalIVStartValue,
621                              VPTransformState &State,
622                              bool IsEpilogueVectorization) {
623   // Check if the backedge taken count is needed, and if so build it.
624   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
625     IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
626     auto *TCMO = Builder.CreateSub(TripCountV,
627                                    ConstantInt::get(TripCountV->getType(), 1),
628                                    "trip.count.minus.1");
629     auto VF = State.VF;
630     Value *VTCMO =
631         VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast");
632     for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
633       State.set(BackedgeTakenCount, VTCMO, Part);
634   }
635 
636   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
637     State.set(&VectorTripCount, VectorTripCountV, Part);
638 
639   // When vectorizing the epilogue loop, the canonical induction start value
640   // needs to be changed from zero to the value after the main vector loop.
641   // FIXME: Improve modeling for canonical IV start values in the epilogue loop.
642   if (CanonicalIVStartValue) {
643     VPValue *VPV = getVPValueOrAddLiveIn(CanonicalIVStartValue);
644     auto *IV = getCanonicalIV();
645     assert(all_of(IV->users(),
646                   [](const VPUser *U) {
647                     if (isa<VPScalarIVStepsRecipe>(U) ||
648                         isa<VPDerivedIVRecipe>(U))
649                       return true;
650                     auto *VPI = cast<VPInstruction>(U);
651                     return VPI->getOpcode() ==
652                                VPInstruction::CanonicalIVIncrement ||
653                            VPI->getOpcode() ==
654                                VPInstruction::CanonicalIVIncrementNUW;
655                   }) &&
656            "the canonical IV should only be used by its increments or "
657            "ScalarIVSteps when resetting the start value");
658     IV->setOperand(0, VPV);
659   }
660 }
661 
662 /// Generate the code inside the preheader and body of the vectorized loop.
663 /// Assumes a single pre-header basic-block was created for this. Introduce
664 /// additional basic-blocks as needed, and fill them all.
665 void VPlan::execute(VPTransformState *State) {
666   // Set the reverse mapping from VPValues to Values for code generation.
667   for (auto &Entry : Value2VPValue)
668     State->VPValue2Value[Entry.second] = Entry.first;
669 
670   // Initialize CFG state.
671   State->CFG.PrevVPBB = nullptr;
672   State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
673   BasicBlock *VectorPreHeader = State->CFG.PrevBB;
674   State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
675 
676   // Generate code in the loop pre-header and body.
677   for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
678     Block->execute(State);
679 
680   VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
681   BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
682 
683   // Fix the latch value of canonical, reduction and first-order recurrences
684   // phis in the vector loop.
685   VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
686   for (VPRecipeBase &R : Header->phis()) {
687     // Skip phi-like recipes that generate their backedege values themselves.
688     if (isa<VPWidenPHIRecipe>(&R))
689       continue;
690 
691     if (isa<VPWidenPointerInductionRecipe>(&R) ||
692         isa<VPWidenIntOrFpInductionRecipe>(&R)) {
693       PHINode *Phi = nullptr;
694       if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
695         Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0));
696       } else {
697         auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
698         // TODO: Split off the case that all users of a pointer phi are scalar
699         // from the VPWidenPointerInductionRecipe.
700         if (WidenPhi->onlyScalarsGenerated(State->VF))
701           continue;
702 
703         auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
704         Phi = cast<PHINode>(GEP->getPointerOperand());
705       }
706 
707       Phi->setIncomingBlock(1, VectorLatchBB);
708 
709       // Move the last step to the end of the latch block. This ensures
710       // consistent placement of all induction updates.
711       Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
712       Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
713       continue;
714     }
715 
716     auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
717     // For  canonical IV, first-order recurrences and in-order reduction phis,
718     // only a single part is generated, which provides the last part from the
719     // previous iteration. For non-ordered reductions all UF parts are
720     // generated.
721     bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
722                             isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) ||
723                             (isa<VPReductionPHIRecipe>(PhiR) &&
724                              cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
725     unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
726 
727     for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
728       Value *Phi = State->get(PhiR, Part);
729       Value *Val = State->get(PhiR->getBackedgeValue(),
730                               SinglePartNeeded ? State->UF - 1 : Part);
731       cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
732     }
733   }
734 
735   // We do not attempt to preserve DT for outer loop vectorization currently.
736   if (!EnableVPlanNativePath) {
737     BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header];
738     State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader);
739     updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB,
740                         State->CFG.ExitBB);
741   }
742 }
743 
744 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
745 LLVM_DUMP_METHOD
746 void VPlan::print(raw_ostream &O) const {
747   VPSlotTracker SlotTracker(this);
748 
749   O << "VPlan '" << getName() << "' {";
750 
751   if (VectorTripCount.getNumUsers() > 0) {
752     O << "\nLive-in ";
753     VectorTripCount.printAsOperand(O, SlotTracker);
754     O << " = vector-trip-count";
755   }
756 
757   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
758     O << "\nLive-in ";
759     BackedgeTakenCount->printAsOperand(O, SlotTracker);
760     O << " = backedge-taken count";
761   }
762 
763   O << "\n";
764   if (TripCount->isLiveIn())
765     O << "Live-in ";
766   TripCount->printAsOperand(O, SlotTracker);
767   O << " = original trip-count";
768   O << "\n";
769 
770   if (!getPreheader()->empty()) {
771     O << "\n";
772     getPreheader()->print(O, "", SlotTracker);
773   }
774 
775   for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) {
776     O << '\n';
777     Block->print(O, "", SlotTracker);
778   }
779 
780   if (!LiveOuts.empty())
781     O << "\n";
782   for (const auto &KV : LiveOuts) {
783     O << "Live-out ";
784     KV.second->getPhi()->printAsOperand(O);
785     O << " = ";
786     KV.second->getOperand(0)->printAsOperand(O, SlotTracker);
787     O << "\n";
788   }
789 
790   O << "}\n";
791 }
792 
793 std::string VPlan::getName() const {
794   std::string Out;
795   raw_string_ostream RSO(Out);
796   RSO << Name << " for ";
797   if (!VFs.empty()) {
798     RSO << "VF={" << VFs[0];
799     for (ElementCount VF : drop_begin(VFs))
800       RSO << "," << VF;
801     RSO << "},";
802   }
803 
804   if (UFs.empty()) {
805     RSO << "UF>=1";
806   } else {
807     RSO << "UF={" << UFs[0];
808     for (unsigned UF : drop_begin(UFs))
809       RSO << "," << UF;
810     RSO << "}";
811   }
812 
813   return Out;
814 }
815 
816 LLVM_DUMP_METHOD
817 void VPlan::printDOT(raw_ostream &O) const {
818   VPlanPrinter Printer(O, *this);
819   Printer.dump();
820 }
821 
822 LLVM_DUMP_METHOD
823 void VPlan::dump() const { print(dbgs()); }
824 #endif
825 
826 void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
827   assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
828   LiveOuts.insert({PN, new VPLiveOut(PN, V)});
829 }
830 
831 void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB,
832                                 BasicBlock *LoopLatchBB,
833                                 BasicBlock *LoopExitBB) {
834   // The vector body may be more than a single basic-block by this point.
835   // Update the dominator tree information inside the vector body by propagating
836   // it from header to latch, expecting only triangular control-flow, if any.
837   BasicBlock *PostDomSucc = nullptr;
838   for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
839     // Get the list of successors of this block.
840     std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
841     assert(Succs.size() <= 2 &&
842            "Basic block in vector loop has more than 2 successors.");
843     PostDomSucc = Succs[0];
844     if (Succs.size() == 1) {
845       assert(PostDomSucc->getSinglePredecessor() &&
846              "PostDom successor has more than one predecessor.");
847       DT->addNewBlock(PostDomSucc, BB);
848       continue;
849     }
850     BasicBlock *InterimSucc = Succs[1];
851     if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
852       PostDomSucc = Succs[1];
853       InterimSucc = Succs[0];
854     }
855     assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
856            "One successor of a basic block does not lead to the other.");
857     assert(InterimSucc->getSinglePredecessor() &&
858            "Interim successor has more than one predecessor.");
859     assert(PostDomSucc->hasNPredecessors(2) &&
860            "PostDom successor has more than two predecessors.");
861     DT->addNewBlock(InterimSucc, BB);
862     DT->addNewBlock(PostDomSucc, BB);
863   }
864   // Latch block is a new dominator for the loop exit.
865   DT->changeImmediateDominator(LoopExitBB, LoopLatchBB);
866   assert(DT->verify(DominatorTree::VerificationLevel::Fast));
867 }
868 
869 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
870 
871 Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
872   return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
873          Twine(getOrCreateBID(Block));
874 }
875 
876 Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
877   const std::string &Name = Block->getName();
878   if (!Name.empty())
879     return Name;
880   return "VPB" + Twine(getOrCreateBID(Block));
881 }
882 
883 void VPlanPrinter::dump() {
884   Depth = 1;
885   bumpIndent(0);
886   OS << "digraph VPlan {\n";
887   OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
888   if (!Plan.getName().empty())
889     OS << "\\n" << DOT::EscapeString(Plan.getName());
890   if (Plan.BackedgeTakenCount) {
891     OS << ", where:\\n";
892     Plan.BackedgeTakenCount->print(OS, SlotTracker);
893     OS << " := BackedgeTakenCount";
894   }
895   OS << "\"]\n";
896   OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
897   OS << "edge [fontname=Courier, fontsize=30]\n";
898   OS << "compound=true\n";
899 
900   dumpBlock(Plan.getPreheader());
901 
902   for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
903     dumpBlock(Block);
904 
905   OS << "}\n";
906 }
907 
908 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
909   if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
910     dumpBasicBlock(BasicBlock);
911   else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
912     dumpRegion(Region);
913   else
914     llvm_unreachable("Unsupported kind of VPBlock.");
915 }
916 
917 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
918                             bool Hidden, const Twine &Label) {
919   // Due to "dot" we print an edge between two regions as an edge between the
920   // exiting basic block and the entry basic of the respective regions.
921   const VPBlockBase *Tail = From->getExitingBasicBlock();
922   const VPBlockBase *Head = To->getEntryBasicBlock();
923   OS << Indent << getUID(Tail) << " -> " << getUID(Head);
924   OS << " [ label=\"" << Label << '\"';
925   if (Tail != From)
926     OS << " ltail=" << getUID(From);
927   if (Head != To)
928     OS << " lhead=" << getUID(To);
929   if (Hidden)
930     OS << "; splines=none";
931   OS << "]\n";
932 }
933 
934 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
935   auto &Successors = Block->getSuccessors();
936   if (Successors.size() == 1)
937     drawEdge(Block, Successors.front(), false, "");
938   else if (Successors.size() == 2) {
939     drawEdge(Block, Successors.front(), false, "T");
940     drawEdge(Block, Successors.back(), false, "F");
941   } else {
942     unsigned SuccessorNumber = 0;
943     for (auto *Successor : Successors)
944       drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
945   }
946 }
947 
948 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
949   // Implement dot-formatted dump by performing plain-text dump into the
950   // temporary storage followed by some post-processing.
951   OS << Indent << getUID(BasicBlock) << " [label =\n";
952   bumpIndent(1);
953   std::string Str;
954   raw_string_ostream SS(Str);
955   // Use no indentation as we need to wrap the lines into quotes ourselves.
956   BasicBlock->print(SS, "", SlotTracker);
957 
958   // We need to process each line of the output separately, so split
959   // single-string plain-text dump.
960   SmallVector<StringRef, 0> Lines;
961   StringRef(Str).rtrim('\n').split(Lines, "\n");
962 
963   auto EmitLine = [&](StringRef Line, StringRef Suffix) {
964     OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
965   };
966 
967   // Don't need the "+" after the last line.
968   for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
969     EmitLine(Line, " +\n");
970   EmitLine(Lines.back(), "\n");
971 
972   bumpIndent(-1);
973   OS << Indent << "]\n";
974 
975   dumpEdges(BasicBlock);
976 }
977 
978 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
979   OS << Indent << "subgraph " << getUID(Region) << " {\n";
980   bumpIndent(1);
981   OS << Indent << "fontname=Courier\n"
982      << Indent << "label=\""
983      << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
984      << DOT::EscapeString(Region->getName()) << "\"\n";
985   // Dump the blocks of the region.
986   assert(Region->getEntry() && "Region contains no inner blocks.");
987   for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry()))
988     dumpBlock(Block);
989   bumpIndent(-1);
990   OS << Indent << "}\n";
991   dumpEdges(Region);
992 }
993 
994 void VPlanIngredient::print(raw_ostream &O) const {
995   if (auto *Inst = dyn_cast<Instruction>(V)) {
996     if (!Inst->getType()->isVoidTy()) {
997       Inst->printAsOperand(O, false);
998       O << " = ";
999     }
1000     O << Inst->getOpcodeName() << " ";
1001     unsigned E = Inst->getNumOperands();
1002     if (E > 0) {
1003       Inst->getOperand(0)->printAsOperand(O, false);
1004       for (unsigned I = 1; I < E; ++I)
1005         Inst->getOperand(I)->printAsOperand(O << ", ", false);
1006     }
1007   } else // !Inst
1008     V->printAsOperand(O, false);
1009 }
1010 
1011 #endif
1012 
1013 template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
1014 
1015 void VPValue::replaceAllUsesWith(VPValue *New) {
1016   for (unsigned J = 0; J < getNumUsers();) {
1017     VPUser *User = Users[J];
1018     unsigned NumUsers = getNumUsers();
1019     for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
1020       if (User->getOperand(I) == this)
1021         User->setOperand(I, New);
1022     // If a user got removed after updating the current user, the next user to
1023     // update will be moved to the current position, so we only need to
1024     // increment the index if the number of users did not change.
1025     if (NumUsers == getNumUsers())
1026       J++;
1027   }
1028 }
1029 
1030 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1031 void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const {
1032   if (const Value *UV = getUnderlyingValue()) {
1033     OS << "ir<";
1034     UV->printAsOperand(OS, false);
1035     OS << ">";
1036     return;
1037   }
1038 
1039   unsigned Slot = Tracker.getSlot(this);
1040   if (Slot == unsigned(-1))
1041     OS << "<badref>";
1042   else
1043     OS << "vp<%" << Tracker.getSlot(this) << ">";
1044 }
1045 
1046 void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
1047   interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
1048     Op->printAsOperand(O, SlotTracker);
1049   });
1050 }
1051 #endif
1052 
1053 void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
1054                                           Old2NewTy &Old2New,
1055                                           InterleavedAccessInfo &IAI) {
1056   ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
1057       RPOT(Region->getEntry());
1058   for (VPBlockBase *Base : RPOT) {
1059     visitBlock(Base, Old2New, IAI);
1060   }
1061 }
1062 
1063 void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
1064                                          InterleavedAccessInfo &IAI) {
1065   if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
1066     for (VPRecipeBase &VPI : *VPBB) {
1067       if (isa<VPHeaderPHIRecipe>(&VPI))
1068         continue;
1069       assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
1070       auto *VPInst = cast<VPInstruction>(&VPI);
1071 
1072       auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
1073       if (!Inst)
1074         continue;
1075       auto *IG = IAI.getInterleaveGroup(Inst);
1076       if (!IG)
1077         continue;
1078 
1079       auto NewIGIter = Old2New.find(IG);
1080       if (NewIGIter == Old2New.end())
1081         Old2New[IG] = new InterleaveGroup<VPInstruction>(
1082             IG->getFactor(), IG->isReverse(), IG->getAlign());
1083 
1084       if (Inst == IG->getInsertPos())
1085         Old2New[IG]->setInsertPos(VPInst);
1086 
1087       InterleaveGroupMap[VPInst] = Old2New[IG];
1088       InterleaveGroupMap[VPInst]->insertMember(
1089           VPInst, IG->getIndex(Inst),
1090           Align(IG->isReverse() ? (-1) * int(IG->getFactor())
1091                                 : IG->getFactor()));
1092     }
1093   } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1094     visitRegion(Region, Old2New, IAI);
1095   else
1096     llvm_unreachable("Unsupported kind of VPBlock.");
1097 }
1098 
1099 VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
1100                                                  InterleavedAccessInfo &IAI) {
1101   Old2NewTy Old2New;
1102   visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI);
1103 }
1104 
1105 void VPSlotTracker::assignSlot(const VPValue *V) {
1106   assert(!Slots.contains(V) && "VPValue already has a slot!");
1107   Slots[V] = NextSlot++;
1108 }
1109 
1110 void VPSlotTracker::assignSlots(const VPlan &Plan) {
1111   assignSlot(&Plan.VectorTripCount);
1112   if (Plan.BackedgeTakenCount)
1113     assignSlot(Plan.BackedgeTakenCount);
1114   assignSlots(Plan.getPreheader());
1115 
1116   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
1117       RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
1118   for (const VPBasicBlock *VPBB :
1119        VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
1120     assignSlots(VPBB);
1121 }
1122 
1123 void VPSlotTracker::assignSlots(const VPBasicBlock *VPBB) {
1124   for (const VPRecipeBase &Recipe : *VPBB)
1125     for (VPValue *Def : Recipe.definedValues())
1126       assignSlot(Def);
1127 }
1128 
1129 bool vputils::onlyFirstLaneUsed(VPValue *Def) {
1130   return all_of(Def->users(),
1131                 [Def](VPUser *U) { return U->onlyFirstLaneUsed(Def); });
1132 }
1133 
1134 VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
1135                                                 ScalarEvolution &SE) {
1136   if (auto *Expanded = Plan.getSCEVExpansion(Expr))
1137     return Expanded;
1138   VPValue *Expanded = nullptr;
1139   if (auto *E = dyn_cast<SCEVConstant>(Expr))
1140     Expanded = Plan.getVPValueOrAddLiveIn(E->getValue());
1141   else if (auto *E = dyn_cast<SCEVUnknown>(Expr))
1142     Expanded = Plan.getVPValueOrAddLiveIn(E->getValue());
1143   else {
1144     Expanded = new VPExpandSCEVRecipe(Expr, SE);
1145     Plan.getPreheader()->appendRecipe(Expanded->getDefiningRecipe());
1146   }
1147   Plan.addSCEVExpansion(Expr, Expanded);
1148   return Expanded;
1149 }
1150