xref: /llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp (revision f25089237376dd43c8c37a18ea9d132f0845eda4)
1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This is the LLVM vectorization plan. It represents a candidate for
11 /// vectorization, allowing to plan and optimize how to vectorize a given loop
12 /// before generating LLVM-IR.
13 /// The vectorizer uses vectorization plans to estimate the costs of potential
14 /// candidates and if profitable to execute the desired plan, generating vector
15 /// LLVM-IR code.
16 ///
17 //===----------------------------------------------------------------------===//
18 
19 #include "VPlan.h"
20 #include "VPlanDominatorTree.h"
21 #include "llvm/ADT/DepthFirstIterator.h"
22 #include "llvm/ADT/PostOrderIterator.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/Analysis/IVDescriptors.h"
27 #include "llvm/Analysis/LoopInfo.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/CFG.h"
30 #include "llvm/IR/InstrTypes.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/Support/Casting.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/GenericDomTreeConstruction.h"
40 #include "llvm/Support/GraphWriter.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
43 #include <cassert>
44 #include <iterator>
45 #include <string>
46 #include <vector>
47 
48 using namespace llvm;
49 extern cl::opt<bool> EnableVPlanNativePath;
50 
51 #define DEBUG_TYPE "vplan"
52 
53 raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) {
54   const VPInstruction *Instr = dyn_cast<VPInstruction>(&V);
55   VPSlotTracker SlotTracker(
56       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
57   V.print(OS, SlotTracker);
58   return OS;
59 }
60 
61 VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
62     : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
63   if (Def)
64     Def->addDefinedValue(this);
65 }
66 
67 VPValue::~VPValue() {
68   assert(Users.empty() && "trying to delete a VPValue with remaining users");
69   if (Def)
70     Def->removeDefinedValue(this);
71 }
72 
73 void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const {
74   if (const VPInstruction *Instr = dyn_cast<VPInstruction>(this))
75     Instr->print(OS, SlotTracker);
76   else
77     printAsOperand(OS, SlotTracker);
78 }
79 
80 void VPValue::dump() const {
81   const VPInstruction *Instr = dyn_cast<VPInstruction>(this);
82   VPSlotTracker SlotTracker(
83       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
84   print(dbgs(), SlotTracker);
85   dbgs() << "\n";
86 }
87 
88 void VPRecipeBase::dump() const {
89   VPSlotTracker SlotTracker(nullptr);
90   print(dbgs(), "", SlotTracker);
91   dbgs() << "\n";
92 }
93 
94 VPUser *VPRecipeBase::toVPUser() {
95   if (auto *U = dyn_cast<VPInstruction>(this))
96     return U;
97   if (auto *U = dyn_cast<VPWidenRecipe>(this))
98     return U;
99   if (auto *U = dyn_cast<VPWidenCallRecipe>(this))
100     return U;
101   if (auto *U = dyn_cast<VPWidenSelectRecipe>(this))
102     return U;
103   if (auto *U = dyn_cast<VPWidenGEPRecipe>(this))
104     return U;
105   if (auto *U = dyn_cast<VPBlendRecipe>(this))
106     return U;
107   if (auto *U = dyn_cast<VPInterleaveRecipe>(this))
108     return U;
109   if (auto *U = dyn_cast<VPReplicateRecipe>(this))
110     return U;
111   if (auto *U = dyn_cast<VPBranchOnMaskRecipe>(this))
112     return U;
113   if (auto *U = dyn_cast<VPWidenMemoryInstructionRecipe>(this))
114     return U;
115   if (auto *U = dyn_cast<VPReductionRecipe>(this))
116     return U;
117   if (auto *U = dyn_cast<VPPredInstPHIRecipe>(this))
118     return U;
119   return nullptr;
120 }
121 
122 VPValue *VPRecipeBase::toVPValue() {
123   if (getNumDefinedValues() == 1)
124     return getVPValue();
125   if (auto *V = dyn_cast<VPInstruction>(this))
126     return V;
127   return nullptr;
128 }
129 
130 const VPValue *VPRecipeBase::toVPValue() const {
131   if (getNumDefinedValues() == 1)
132     return getVPValue();
133   if (auto *V = dyn_cast<VPInstruction>(this))
134     return V;
135   return nullptr;
136 }
137 
138 // Get the top-most entry block of \p Start. This is the entry block of the
139 // containing VPlan. This function is templated to support both const and non-const blocks
140 template <typename T> static T *getPlanEntry(T *Start) {
141   T *Next = Start;
142   T *Current = Start;
143   while ((Next = Next->getParent()))
144     Current = Next;
145 
146   SmallSetVector<T *, 8> WorkList;
147   WorkList.insert(Current);
148 
149   for (unsigned i = 0; i < WorkList.size(); i++) {
150     T *Current = WorkList[i];
151     if (Current->getNumPredecessors() == 0)
152       return Current;
153     auto &Predecessors = Current->getPredecessors();
154     WorkList.insert(Predecessors.begin(), Predecessors.end());
155   }
156 
157   llvm_unreachable("VPlan without any entry node without predecessors");
158 }
159 
160 VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
161 
162 const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
163 
164 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
165 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
166   const VPBlockBase *Block = this;
167   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
168     Block = Region->getEntry();
169   return cast<VPBasicBlock>(Block);
170 }
171 
172 VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
173   VPBlockBase *Block = this;
174   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
175     Block = Region->getEntry();
176   return cast<VPBasicBlock>(Block);
177 }
178 
179 void VPBlockBase::setPlan(VPlan *ParentPlan) {
180   assert(ParentPlan->getEntry() == this &&
181          "Can only set plan on its entry block.");
182   Plan = ParentPlan;
183 }
184 
185 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
186 const VPBasicBlock *VPBlockBase::getExitBasicBlock() const {
187   const VPBlockBase *Block = this;
188   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
189     Block = Region->getExit();
190   return cast<VPBasicBlock>(Block);
191 }
192 
193 VPBasicBlock *VPBlockBase::getExitBasicBlock() {
194   VPBlockBase *Block = this;
195   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
196     Block = Region->getExit();
197   return cast<VPBasicBlock>(Block);
198 }
199 
200 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
201   if (!Successors.empty() || !Parent)
202     return this;
203   assert(Parent->getExit() == this &&
204          "Block w/o successors not the exit of its parent.");
205   return Parent->getEnclosingBlockWithSuccessors();
206 }
207 
208 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
209   if (!Predecessors.empty() || !Parent)
210     return this;
211   assert(Parent->getEntry() == this &&
212          "Block w/o predecessors not the entry of its parent.");
213   return Parent->getEnclosingBlockWithPredecessors();
214 }
215 
216 void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
217   SmallVector<VPBlockBase *, 8> Blocks;
218 
219   for (VPBlockBase *Block : depth_first(Entry))
220     Blocks.push_back(Block);
221 
222   for (VPBlockBase *Block : Blocks)
223     delete Block;
224 }
225 
226 VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
227   iterator It = begin();
228   while (It != end() && (isa<VPWidenPHIRecipe>(&*It) ||
229                          isa<VPWidenIntOrFpInductionRecipe>(&*It) ||
230                          isa<VPPredInstPHIRecipe>(&*It) ||
231                          isa<VPWidenCanonicalIVRecipe>(&*It)))
232     It++;
233   return It;
234 }
235 
236 BasicBlock *
237 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
238   // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
239   // Pred stands for Predessor. Prev stands for Previous - last visited/created.
240   BasicBlock *PrevBB = CFG.PrevBB;
241   BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
242                                          PrevBB->getParent(), CFG.LastBB);
243   LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
244 
245   // Hook up the new basic block to its predecessors.
246   for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
247     VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock();
248     auto &PredVPSuccessors = PredVPBB->getSuccessors();
249     BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
250 
251     // In outer loop vectorization scenario, the predecessor BBlock may not yet
252     // be visited(backedge). Mark the VPBasicBlock for fixup at the end of
253     // vectorization. We do not encounter this case in inner loop vectorization
254     // as we start out by building a loop skeleton with the vector loop header
255     // and latch blocks. As a result, we never enter this function for the
256     // header block in the non VPlan-native path.
257     if (!PredBB) {
258       assert(EnableVPlanNativePath &&
259              "Unexpected null predecessor in non VPlan-native path");
260       CFG.VPBBsToFix.push_back(PredVPBB);
261       continue;
262     }
263 
264     assert(PredBB && "Predecessor basic-block not found building successor.");
265     auto *PredBBTerminator = PredBB->getTerminator();
266     LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
267     if (isa<UnreachableInst>(PredBBTerminator)) {
268       assert(PredVPSuccessors.size() == 1 &&
269              "Predecessor ending w/o branch must have single successor.");
270       PredBBTerminator->eraseFromParent();
271       BranchInst::Create(NewBB, PredBB);
272     } else {
273       assert(PredVPSuccessors.size() == 2 &&
274              "Predecessor ending with branch must have two successors.");
275       unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
276       assert(!PredBBTerminator->getSuccessor(idx) &&
277              "Trying to reset an existing successor block.");
278       PredBBTerminator->setSuccessor(idx, NewBB);
279     }
280   }
281   return NewBB;
282 }
283 
284 void VPBasicBlock::execute(VPTransformState *State) {
285   bool Replica = State->Instance &&
286                  !(State->Instance->Part == 0 && State->Instance->Lane == 0);
287   VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
288   VPBlockBase *SingleHPred = nullptr;
289   BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
290 
291   // 1. Create an IR basic block, or reuse the last one if possible.
292   // The last IR basic block is reused, as an optimization, in three cases:
293   // A. the first VPBB reuses the loop header BB - when PrevVPBB is null;
294   // B. when the current VPBB has a single (hierarchical) predecessor which
295   //    is PrevVPBB and the latter has a single (hierarchical) successor; and
296   // C. when the current VPBB is an entry of a region replica - where PrevVPBB
297   //    is the exit of this region from a previous instance, or the predecessor
298   //    of this region.
299   if (PrevVPBB && /* A */
300       !((SingleHPred = getSingleHierarchicalPredecessor()) &&
301         SingleHPred->getExitBasicBlock() == PrevVPBB &&
302         PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */
303       !(Replica && getPredecessors().empty())) {       /* C */
304     NewBB = createEmptyBasicBlock(State->CFG);
305     State->Builder.SetInsertPoint(NewBB);
306     // Temporarily terminate with unreachable until CFG is rewired.
307     UnreachableInst *Terminator = State->Builder.CreateUnreachable();
308     State->Builder.SetInsertPoint(Terminator);
309     // Register NewBB in its loop. In innermost loops its the same for all BB's.
310     Loop *L = State->LI->getLoopFor(State->CFG.LastBB);
311     L->addBasicBlockToLoop(NewBB, *State->LI);
312     State->CFG.PrevBB = NewBB;
313   }
314 
315   // 2. Fill the IR basic block with IR instructions.
316   LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
317                     << " in BB:" << NewBB->getName() << '\n');
318 
319   State->CFG.VPBB2IRBB[this] = NewBB;
320   State->CFG.PrevVPBB = this;
321 
322   for (VPRecipeBase &Recipe : Recipes)
323     Recipe.execute(*State);
324 
325   VPValue *CBV;
326   if (EnableVPlanNativePath && (CBV = getCondBit())) {
327     Value *IRCBV = CBV->getUnderlyingValue();
328     assert(IRCBV && "Unexpected null underlying value for condition bit");
329 
330     // Condition bit value in a VPBasicBlock is used as the branch selector. In
331     // the VPlan-native path case, since all branches are uniform we generate a
332     // branch instruction using the condition value from vector lane 0 and dummy
333     // successors. The successors are fixed later when the successor blocks are
334     // visited.
335     Value *NewCond = State->Callback.getOrCreateVectorValues(IRCBV, 0);
336     NewCond = State->Builder.CreateExtractElement(NewCond,
337                                                   State->Builder.getInt32(0));
338 
339     // Replace the temporary unreachable terminator with the new conditional
340     // branch.
341     auto *CurrentTerminator = NewBB->getTerminator();
342     assert(isa<UnreachableInst>(CurrentTerminator) &&
343            "Expected to replace unreachable terminator with conditional "
344            "branch.");
345     auto *CondBr = BranchInst::Create(NewBB, nullptr, NewCond);
346     CondBr->setSuccessor(0, nullptr);
347     ReplaceInstWithInst(CurrentTerminator, CondBr);
348   }
349 
350   LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
351 }
352 
353 void VPBasicBlock::dropAllReferences(VPValue *NewValue) {
354   for (VPRecipeBase &R : Recipes) {
355     if (VPValue *Def = R.toVPValue())
356       Def->replaceAllUsesWith(NewValue);
357     else if (auto *IR = dyn_cast<VPInterleaveRecipe>(&R)) {
358       for (auto *Def : IR->definedValues())
359         Def->replaceAllUsesWith(NewValue);
360     }
361 
362     if (auto *User = R.toVPUser())
363       for (unsigned I = 0, E = User->getNumOperands(); I != E; I++)
364         User->setOperand(I, NewValue);
365   }
366 }
367 
368 void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
369   for (VPBlockBase *Block : depth_first(Entry))
370     // Drop all references in VPBasicBlocks and replace all uses with
371     // DummyValue.
372     Block->dropAllReferences(NewValue);
373 }
374 
375 void VPRegionBlock::execute(VPTransformState *State) {
376   ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry);
377 
378   if (!isReplicator()) {
379     // Visit the VPBlocks connected to "this", starting from it.
380     for (VPBlockBase *Block : RPOT) {
381       if (EnableVPlanNativePath) {
382         // The inner loop vectorization path does not represent loop preheader
383         // and exit blocks as part of the VPlan. In the VPlan-native path, skip
384         // vectorizing loop preheader block. In future, we may replace this
385         // check with the check for loop preheader.
386         if (Block->getNumPredecessors() == 0)
387           continue;
388 
389         // Skip vectorizing loop exit block. In future, we may replace this
390         // check with the check for loop exit.
391         if (Block->getNumSuccessors() == 0)
392           continue;
393       }
394 
395       LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
396       Block->execute(State);
397     }
398     return;
399   }
400 
401   assert(!State->Instance && "Replicating a Region with non-null instance.");
402 
403   // Enter replicating mode.
404   State->Instance = {0, 0};
405 
406   for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
407     State->Instance->Part = Part;
408     assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
409     for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
410          ++Lane) {
411       State->Instance->Lane = Lane;
412       // Visit the VPBlocks connected to \p this, starting from it.
413       for (VPBlockBase *Block : RPOT) {
414         LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
415         Block->execute(State);
416       }
417     }
418   }
419 
420   // Exit replicating mode.
421   State->Instance.reset();
422 }
423 
424 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
425   assert(!Parent && "Recipe already in some VPBasicBlock");
426   assert(InsertPos->getParent() &&
427          "Insertion position not in any VPBasicBlock");
428   Parent = InsertPos->getParent();
429   Parent->getRecipeList().insert(InsertPos->getIterator(), this);
430 }
431 
432 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
433   assert(!Parent && "Recipe already in some VPBasicBlock");
434   assert(InsertPos->getParent() &&
435          "Insertion position not in any VPBasicBlock");
436   Parent = InsertPos->getParent();
437   Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
438 }
439 
440 void VPRecipeBase::removeFromParent() {
441   assert(getParent() && "Recipe not in any VPBasicBlock");
442   getParent()->getRecipeList().remove(getIterator());
443   Parent = nullptr;
444 }
445 
446 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
447   assert(getParent() && "Recipe not in any VPBasicBlock");
448   return getParent()->getRecipeList().erase(getIterator());
449 }
450 
451 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
452   removeFromParent();
453   insertAfter(InsertPos);
454 }
455 
456 void VPInstruction::generateInstruction(VPTransformState &State,
457                                         unsigned Part) {
458   IRBuilder<> &Builder = State.Builder;
459 
460   if (Instruction::isBinaryOp(getOpcode())) {
461     Value *A = State.get(getOperand(0), Part);
462     Value *B = State.get(getOperand(1), Part);
463     Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
464     State.set(this, V, Part);
465     return;
466   }
467 
468   switch (getOpcode()) {
469   case VPInstruction::Not: {
470     Value *A = State.get(getOperand(0), Part);
471     Value *V = Builder.CreateNot(A);
472     State.set(this, V, Part);
473     break;
474   }
475   case VPInstruction::ICmpULE: {
476     Value *IV = State.get(getOperand(0), Part);
477     Value *TC = State.get(getOperand(1), Part);
478     Value *V = Builder.CreateICmpULE(IV, TC);
479     State.set(this, V, Part);
480     break;
481   }
482   case Instruction::Select: {
483     Value *Cond = State.get(getOperand(0), Part);
484     Value *Op1 = State.get(getOperand(1), Part);
485     Value *Op2 = State.get(getOperand(2), Part);
486     Value *V = Builder.CreateSelect(Cond, Op1, Op2);
487     State.set(this, V, Part);
488     break;
489   }
490   case VPInstruction::ActiveLaneMask: {
491     // Get first lane of vector induction variable.
492     Value *VIVElem0 = State.get(getOperand(0), {Part, 0});
493     // Get the original loop tripcount.
494     Value *ScalarTC = State.TripCount;
495 
496     auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
497     auto *PredTy = FixedVectorType::get(Int1Ty, State.VF.getKnownMinValue());
498     Instruction *Call = Builder.CreateIntrinsic(
499         Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
500         {VIVElem0, ScalarTC}, nullptr, "active.lane.mask");
501     State.set(this, Call, Part);
502     break;
503   }
504   default:
505     llvm_unreachable("Unsupported opcode for instruction");
506   }
507 }
508 
509 void VPInstruction::execute(VPTransformState &State) {
510   assert(!State.Instance && "VPInstruction executing an Instance");
511   for (unsigned Part = 0; Part < State.UF; ++Part)
512     generateInstruction(State, Part);
513 }
514 
515 void VPInstruction::print(raw_ostream &O, const Twine &Indent,
516                           VPSlotTracker &SlotTracker) const {
517   O << "\"EMIT ";
518   print(O, SlotTracker);
519 }
520 
521 void VPInstruction::print(raw_ostream &O) const {
522   VPSlotTracker SlotTracker(getParent()->getPlan());
523   print(O, SlotTracker);
524 }
525 
526 void VPInstruction::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
527   if (hasResult()) {
528     printAsOperand(O, SlotTracker);
529     O << " = ";
530   }
531 
532   switch (getOpcode()) {
533   case VPInstruction::Not:
534     O << "not";
535     break;
536   case VPInstruction::ICmpULE:
537     O << "icmp ule";
538     break;
539   case VPInstruction::SLPLoad:
540     O << "combined load";
541     break;
542   case VPInstruction::SLPStore:
543     O << "combined store";
544     break;
545   case VPInstruction::ActiveLaneMask:
546     O << "active lane mask";
547     break;
548 
549   default:
550     O << Instruction::getOpcodeName(getOpcode());
551   }
552 
553   for (const VPValue *Operand : operands()) {
554     O << " ";
555     Operand->printAsOperand(O, SlotTracker);
556   }
557 }
558 
559 /// Generate the code inside the body of the vectorized loop. Assumes a single
560 /// LoopVectorBody basic-block was created for this. Introduce additional
561 /// basic-blocks as needed, and fill them all.
562 void VPlan::execute(VPTransformState *State) {
563   // -1. Check if the backedge taken count is needed, and if so build it.
564   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
565     Value *TC = State->TripCount;
566     IRBuilder<> Builder(State->CFG.PrevBB->getTerminator());
567     auto *TCMO = Builder.CreateSub(TC, ConstantInt::get(TC->getType(), 1),
568                                    "trip.count.minus.1");
569     auto VF = State->VF;
570     Value *VTCMO =
571         VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast");
572     for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part)
573       State->set(BackedgeTakenCount, VTCMO, Part);
574   }
575 
576   // 0. Set the reverse mapping from VPValues to Values for code generation.
577   for (auto &Entry : Value2VPValue)
578     State->VPValue2Value[Entry.second] = Entry.first;
579 
580   BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB;
581   BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor();
582   assert(VectorHeaderBB && "Loop preheader does not have a single successor.");
583 
584   // 1. Make room to generate basic-blocks inside loop body if needed.
585   BasicBlock *VectorLatchBB = VectorHeaderBB->splitBasicBlock(
586       VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch");
587   Loop *L = State->LI->getLoopFor(VectorHeaderBB);
588   L->addBasicBlockToLoop(VectorLatchBB, *State->LI);
589   // Remove the edge between Header and Latch to allow other connections.
590   // Temporarily terminate with unreachable until CFG is rewired.
591   // Note: this asserts the generated code's assumption that
592   // getFirstInsertionPt() can be dereferenced into an Instruction.
593   VectorHeaderBB->getTerminator()->eraseFromParent();
594   State->Builder.SetInsertPoint(VectorHeaderBB);
595   UnreachableInst *Terminator = State->Builder.CreateUnreachable();
596   State->Builder.SetInsertPoint(Terminator);
597 
598   // 2. Generate code in loop body.
599   State->CFG.PrevVPBB = nullptr;
600   State->CFG.PrevBB = VectorHeaderBB;
601   State->CFG.LastBB = VectorLatchBB;
602 
603   for (VPBlockBase *Block : depth_first(Entry))
604     Block->execute(State);
605 
606   // Setup branch terminator successors for VPBBs in VPBBsToFix based on
607   // VPBB's successors.
608   for (auto VPBB : State->CFG.VPBBsToFix) {
609     assert(EnableVPlanNativePath &&
610            "Unexpected VPBBsToFix in non VPlan-native path");
611     BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB];
612     assert(BB && "Unexpected null basic block for VPBB");
613 
614     unsigned Idx = 0;
615     auto *BBTerminator = BB->getTerminator();
616 
617     for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) {
618       VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock();
619       BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]);
620       ++Idx;
621     }
622   }
623 
624   // 3. Merge the temporary latch created with the last basic-block filled.
625   BasicBlock *LastBB = State->CFG.PrevBB;
626   // Connect LastBB to VectorLatchBB to facilitate their merge.
627   assert((EnableVPlanNativePath ||
628           isa<UnreachableInst>(LastBB->getTerminator())) &&
629          "Expected InnerLoop VPlan CFG to terminate with unreachable");
630   assert((!EnableVPlanNativePath || isa<BranchInst>(LastBB->getTerminator())) &&
631          "Expected VPlan CFG to terminate with branch in NativePath");
632   LastBB->getTerminator()->eraseFromParent();
633   BranchInst::Create(VectorLatchBB, LastBB);
634 
635   // Merge LastBB with Latch.
636   bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI);
637   (void)Merged;
638   assert(Merged && "Could not merge last basic block with latch.");
639   VectorLatchBB = LastBB;
640 
641   // We do not attempt to preserve DT for outer loop vectorization currently.
642   if (!EnableVPlanNativePath)
643     updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB,
644                         L->getExitBlock());
645 }
646 
647 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
648 LLVM_DUMP_METHOD
649 void VPlan::dump() const { dbgs() << *this << '\n'; }
650 #endif
651 
652 void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
653                                 BasicBlock *LoopLatchBB,
654                                 BasicBlock *LoopExitBB) {
655   BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor();
656   assert(LoopHeaderBB && "Loop preheader does not have a single successor.");
657   // The vector body may be more than a single basic-block by this point.
658   // Update the dominator tree information inside the vector body by propagating
659   // it from header to latch, expecting only triangular control-flow, if any.
660   BasicBlock *PostDomSucc = nullptr;
661   for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
662     // Get the list of successors of this block.
663     std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
664     assert(Succs.size() <= 2 &&
665            "Basic block in vector loop has more than 2 successors.");
666     PostDomSucc = Succs[0];
667     if (Succs.size() == 1) {
668       assert(PostDomSucc->getSinglePredecessor() &&
669              "PostDom successor has more than one predecessor.");
670       DT->addNewBlock(PostDomSucc, BB);
671       continue;
672     }
673     BasicBlock *InterimSucc = Succs[1];
674     if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
675       PostDomSucc = Succs[1];
676       InterimSucc = Succs[0];
677     }
678     assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
679            "One successor of a basic block does not lead to the other.");
680     assert(InterimSucc->getSinglePredecessor() &&
681            "Interim successor has more than one predecessor.");
682     assert(PostDomSucc->hasNPredecessors(2) &&
683            "PostDom successor has more than two predecessors.");
684     DT->addNewBlock(InterimSucc, BB);
685     DT->addNewBlock(PostDomSucc, BB);
686   }
687   // Latch block is a new dominator for the loop exit.
688   DT->changeImmediateDominator(LoopExitBB, LoopLatchBB);
689   assert(DT->verify(DominatorTree::VerificationLevel::Fast));
690 }
691 
692 const Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
693   return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
694          Twine(getOrCreateBID(Block));
695 }
696 
697 const Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
698   const std::string &Name = Block->getName();
699   if (!Name.empty())
700     return Name;
701   return "VPB" + Twine(getOrCreateBID(Block));
702 }
703 
704 void VPlanPrinter::dump() {
705   Depth = 1;
706   bumpIndent(0);
707   OS << "digraph VPlan {\n";
708   OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
709   if (!Plan.getName().empty())
710     OS << "\\n" << DOT::EscapeString(Plan.getName());
711   if (Plan.BackedgeTakenCount) {
712     OS << ", where:\\n";
713     Plan.BackedgeTakenCount->print(OS, SlotTracker);
714     OS << " := BackedgeTakenCount";
715   }
716   OS << "\"]\n";
717   OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
718   OS << "edge [fontname=Courier, fontsize=30]\n";
719   OS << "compound=true\n";
720 
721   for (const VPBlockBase *Block : depth_first(Plan.getEntry()))
722     dumpBlock(Block);
723 
724   OS << "}\n";
725 }
726 
727 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
728   if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
729     dumpBasicBlock(BasicBlock);
730   else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
731     dumpRegion(Region);
732   else
733     llvm_unreachable("Unsupported kind of VPBlock.");
734 }
735 
736 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
737                             bool Hidden, const Twine &Label) {
738   // Due to "dot" we print an edge between two regions as an edge between the
739   // exit basic block and the entry basic of the respective regions.
740   const VPBlockBase *Tail = From->getExitBasicBlock();
741   const VPBlockBase *Head = To->getEntryBasicBlock();
742   OS << Indent << getUID(Tail) << " -> " << getUID(Head);
743   OS << " [ label=\"" << Label << '\"';
744   if (Tail != From)
745     OS << " ltail=" << getUID(From);
746   if (Head != To)
747     OS << " lhead=" << getUID(To);
748   if (Hidden)
749     OS << "; splines=none";
750   OS << "]\n";
751 }
752 
753 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
754   auto &Successors = Block->getSuccessors();
755   if (Successors.size() == 1)
756     drawEdge(Block, Successors.front(), false, "");
757   else if (Successors.size() == 2) {
758     drawEdge(Block, Successors.front(), false, "T");
759     drawEdge(Block, Successors.back(), false, "F");
760   } else {
761     unsigned SuccessorNumber = 0;
762     for (auto *Successor : Successors)
763       drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
764   }
765 }
766 
767 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
768   OS << Indent << getUID(BasicBlock) << " [label =\n";
769   bumpIndent(1);
770   OS << Indent << "\"" << DOT::EscapeString(BasicBlock->getName()) << ":\\n\"";
771   bumpIndent(1);
772 
773   // Dump the block predicate.
774   const VPValue *Pred = BasicBlock->getPredicate();
775   if (Pred) {
776     OS << " +\n" << Indent << " \"BlockPredicate: ";
777     if (const VPInstruction *PredI = dyn_cast<VPInstruction>(Pred)) {
778       PredI->printAsOperand(OS, SlotTracker);
779       OS << " (" << DOT::EscapeString(PredI->getParent()->getName())
780          << ")\\l\"";
781     } else
782       Pred->printAsOperand(OS, SlotTracker);
783   }
784 
785   for (const VPRecipeBase &Recipe : *BasicBlock) {
786     OS << " +\n" << Indent;
787     Recipe.print(OS, Indent, SlotTracker);
788     OS << "\\l\"";
789   }
790 
791   // Dump the condition bit.
792   const VPValue *CBV = BasicBlock->getCondBit();
793   if (CBV) {
794     OS << " +\n" << Indent << " \"CondBit: ";
795     if (const VPInstruction *CBI = dyn_cast<VPInstruction>(CBV)) {
796       CBI->printAsOperand(OS, SlotTracker);
797       OS << " (" << DOT::EscapeString(CBI->getParent()->getName()) << ")\\l\"";
798     } else {
799       CBV->printAsOperand(OS, SlotTracker);
800       OS << "\"";
801     }
802   }
803 
804   bumpIndent(-2);
805   OS << "\n" << Indent << "]\n";
806   dumpEdges(BasicBlock);
807 }
808 
809 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
810   OS << Indent << "subgraph " << getUID(Region) << " {\n";
811   bumpIndent(1);
812   OS << Indent << "fontname=Courier\n"
813      << Indent << "label=\""
814      << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
815      << DOT::EscapeString(Region->getName()) << "\"\n";
816   // Dump the blocks of the region.
817   assert(Region->getEntry() && "Region contains no inner blocks.");
818   for (const VPBlockBase *Block : depth_first(Region->getEntry()))
819     dumpBlock(Block);
820   bumpIndent(-1);
821   OS << Indent << "}\n";
822   dumpEdges(Region);
823 }
824 
825 void VPlanPrinter::printAsIngredient(raw_ostream &O, const Value *V) {
826   std::string IngredientString;
827   raw_string_ostream RSO(IngredientString);
828   if (auto *Inst = dyn_cast<Instruction>(V)) {
829     if (!Inst->getType()->isVoidTy()) {
830       Inst->printAsOperand(RSO, false);
831       RSO << " = ";
832     }
833     RSO << Inst->getOpcodeName() << " ";
834     unsigned E = Inst->getNumOperands();
835     if (E > 0) {
836       Inst->getOperand(0)->printAsOperand(RSO, false);
837       for (unsigned I = 1; I < E; ++I)
838         Inst->getOperand(I)->printAsOperand(RSO << ", ", false);
839     }
840   } else // !Inst
841     V->printAsOperand(RSO, false);
842   RSO.flush();
843   O << DOT::EscapeString(IngredientString);
844 }
845 
846 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
847                               VPSlotTracker &SlotTracker) const {
848   O << "\"WIDEN-CALL ";
849 
850   auto *CI = cast<CallInst>(getUnderlyingInstr());
851   if (CI->getType()->isVoidTy())
852     O << "void ";
853   else {
854     printAsOperand(O, SlotTracker);
855     O << " = ";
856   }
857 
858   O << "call @" << CI->getCalledFunction()->getName() << "(";
859   printOperands(O, SlotTracker);
860   O << ")";
861 }
862 
863 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
864                                 VPSlotTracker &SlotTracker) const {
865   O << "\"WIDEN-SELECT ";
866   printAsOperand(O, SlotTracker);
867   O << " = select ";
868   getOperand(0)->printAsOperand(O, SlotTracker);
869   O << ", ";
870   getOperand(1)->printAsOperand(O, SlotTracker);
871   O << ", ";
872   getOperand(2)->printAsOperand(O, SlotTracker);
873   O << (InvariantCond ? " (condition is loop invariant)" : "");
874 }
875 
876 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
877                           VPSlotTracker &SlotTracker) const {
878   O << "\"WIDEN ";
879   printAsOperand(O, SlotTracker);
880   O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
881   printOperands(O, SlotTracker);
882 }
883 
884 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
885                                           VPSlotTracker &SlotTracker) const {
886   O << "\"WIDEN-INDUCTION";
887   if (Trunc) {
888     O << "\\l\"";
889     O << " +\n" << Indent << "\"  " << VPlanIngredient(IV) << "\\l\"";
890     O << " +\n" << Indent << "\"  " << VPlanIngredient(Trunc);
891   } else
892     O << " " << VPlanIngredient(IV);
893 }
894 
895 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
896                              VPSlotTracker &SlotTracker) const {
897   O << "\"WIDEN-GEP ";
898   O << (IsPtrLoopInvariant ? "Inv" : "Var");
899   size_t IndicesNumber = IsIndexLoopInvariant.size();
900   for (size_t I = 0; I < IndicesNumber; ++I)
901     O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
902 
903   O << " ";
904   printAsOperand(O, SlotTracker);
905   O << " = getelementptr ";
906   printOperands(O, SlotTracker);
907 }
908 
909 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
910                              VPSlotTracker &SlotTracker) const {
911   O << "\"WIDEN-PHI " << VPlanIngredient(Phi);
912 }
913 
914 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
915                           VPSlotTracker &SlotTracker) const {
916   O << "\"BLEND ";
917   Phi->printAsOperand(O, false);
918   O << " =";
919   if (getNumIncomingValues() == 1) {
920     // Not a User of any mask: not really blending, this is a
921     // single-predecessor phi.
922     O << " ";
923     getIncomingValue(0)->printAsOperand(O, SlotTracker);
924   } else {
925     for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
926       O << " ";
927       getIncomingValue(I)->printAsOperand(O, SlotTracker);
928       O << "/";
929       getMask(I)->printAsOperand(O, SlotTracker);
930     }
931   }
932 }
933 
934 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
935                               VPSlotTracker &SlotTracker) const {
936   O << "\"REDUCE ";
937   printAsOperand(O, SlotTracker);
938   O << " = ";
939   getChainOp()->printAsOperand(O, SlotTracker);
940   O << " + reduce." << Instruction::getOpcodeName(RdxDesc->getRecurrenceBinOp())
941     << " (";
942   getVecOp()->printAsOperand(O, SlotTracker);
943   if (getCondOp()) {
944     O << ", ";
945     getCondOp()->printAsOperand(O, SlotTracker);
946   }
947   O << ")";
948 }
949 
950 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
951                               VPSlotTracker &SlotTracker) const {
952   O << "\"" << (IsUniform ? "CLONE " : "REPLICATE ");
953 
954   if (!getUnderlyingInstr()->getType()->isVoidTy()) {
955     printAsOperand(O, SlotTracker);
956     O << " = ";
957   }
958   O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
959   printOperands(O, SlotTracker);
960 
961   if (AlsoPack)
962     O << " (S->V)";
963 }
964 
965 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
966                                 VPSlotTracker &SlotTracker) const {
967   O << "\"PHI-PREDICATED-INSTRUCTION ";
968   printOperands(O, SlotTracker);
969 }
970 
971 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
972                                            VPSlotTracker &SlotTracker) const {
973   O << "\"WIDEN ";
974 
975   if (!isStore()) {
976     getVPValue()->printAsOperand(O, SlotTracker);
977     O << " = ";
978   }
979   O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
980 
981   printOperands(O, SlotTracker);
982 }
983 
984 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
985   Value *CanonicalIV = State.CanonicalIV;
986   Type *STy = CanonicalIV->getType();
987   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
988   ElementCount VF = State.VF;
989   assert(!VF.isScalable() && "the code following assumes non scalables ECs");
990   Value *VStart = VF.isScalar()
991                       ? CanonicalIV
992                       : Builder.CreateVectorSplat(VF.getKnownMinValue(),
993                                                   CanonicalIV, "broadcast");
994   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
995     SmallVector<Constant *, 8> Indices;
996     for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
997       Indices.push_back(
998           ConstantInt::get(STy, Part * VF.getKnownMinValue() + Lane));
999     // If VF == 1, there is only one iteration in the loop above, thus the
1000     // element pushed back into Indices is ConstantInt::get(STy, Part)
1001     Constant *VStep =
1002         VF.isScalar() ? Indices.back() : ConstantVector::get(Indices);
1003     // Add the consecutive indices to the vector value.
1004     Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1005     State.set(getVPValue(), CanonicalVectorIV, Part);
1006   }
1007 }
1008 
1009 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
1010                                      VPSlotTracker &SlotTracker) const {
1011   O << "\"EMIT ";
1012   getVPValue()->printAsOperand(O, SlotTracker);
1013   O << " = WIDEN-CANONICAL-INDUCTION";
1014 }
1015 
1016 template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
1017 
1018 void VPValue::replaceAllUsesWith(VPValue *New) {
1019   for (unsigned J = 0; J < getNumUsers();) {
1020     VPUser *User = Users[J];
1021     unsigned NumUsers = getNumUsers();
1022     for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
1023       if (User->getOperand(I) == this)
1024         User->setOperand(I, New);
1025     // If a user got removed after updating the current user, the next user to
1026     // update will be moved to the current position, so we only need to
1027     // increment the index if the number of users did not change.
1028     if (NumUsers == getNumUsers())
1029       J++;
1030   }
1031 }
1032 
1033 void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const {
1034   if (const Value *UV = getUnderlyingValue()) {
1035     OS << "ir<";
1036     UV->printAsOperand(OS, false);
1037     OS << ">";
1038     return;
1039   }
1040 
1041   unsigned Slot = Tracker.getSlot(this);
1042   if (Slot == unsigned(-1))
1043     OS << "<badref>";
1044   else
1045     OS << "vp<%" << Tracker.getSlot(this) << ">";
1046 }
1047 
1048 void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
1049   interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
1050     Op->printAsOperand(O, SlotTracker);
1051   });
1052 }
1053 
1054 void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
1055                                           Old2NewTy &Old2New,
1056                                           InterleavedAccessInfo &IAI) {
1057   ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry());
1058   for (VPBlockBase *Base : RPOT) {
1059     visitBlock(Base, Old2New, IAI);
1060   }
1061 }
1062 
1063 void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
1064                                          InterleavedAccessInfo &IAI) {
1065   if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
1066     for (VPRecipeBase &VPI : *VPBB) {
1067       assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
1068       auto *VPInst = cast<VPInstruction>(&VPI);
1069       auto *Inst = cast<Instruction>(VPInst->getUnderlyingValue());
1070       auto *IG = IAI.getInterleaveGroup(Inst);
1071       if (!IG)
1072         continue;
1073 
1074       auto NewIGIter = Old2New.find(IG);
1075       if (NewIGIter == Old2New.end())
1076         Old2New[IG] = new InterleaveGroup<VPInstruction>(
1077             IG->getFactor(), IG->isReverse(), IG->getAlign());
1078 
1079       if (Inst == IG->getInsertPos())
1080         Old2New[IG]->setInsertPos(VPInst);
1081 
1082       InterleaveGroupMap[VPInst] = Old2New[IG];
1083       InterleaveGroupMap[VPInst]->insertMember(
1084           VPInst, IG->getIndex(Inst),
1085           Align(IG->isReverse() ? (-1) * int(IG->getFactor())
1086                                 : IG->getFactor()));
1087     }
1088   } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1089     visitRegion(Region, Old2New, IAI);
1090   else
1091     llvm_unreachable("Unsupported kind of VPBlock.");
1092 }
1093 
1094 VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
1095                                                  InterleavedAccessInfo &IAI) {
1096   Old2NewTy Old2New;
1097   visitRegion(cast<VPRegionBlock>(Plan.getEntry()), Old2New, IAI);
1098 }
1099 
1100 void VPSlotTracker::assignSlot(const VPValue *V) {
1101   assert(Slots.find(V) == Slots.end() && "VPValue already has a slot!");
1102   const Value *UV = V->getUnderlyingValue();
1103   if (UV)
1104     return;
1105   const auto *VPI = dyn_cast<VPInstruction>(V);
1106   if (VPI && !VPI->hasResult())
1107     return;
1108 
1109   Slots[V] = NextSlot++;
1110 }
1111 
1112 void VPSlotTracker::assignSlots(const VPBlockBase *VPBB) {
1113   if (auto *Region = dyn_cast<VPRegionBlock>(VPBB))
1114     assignSlots(Region);
1115   else
1116     assignSlots(cast<VPBasicBlock>(VPBB));
1117 }
1118 
1119 void VPSlotTracker::assignSlots(const VPRegionBlock *Region) {
1120   ReversePostOrderTraversal<const VPBlockBase *> RPOT(Region->getEntry());
1121   for (const VPBlockBase *Block : RPOT)
1122     assignSlots(Block);
1123 }
1124 
1125 void VPSlotTracker::assignSlots(const VPBasicBlock *VPBB) {
1126   for (const VPRecipeBase &Recipe : *VPBB) {
1127     if (const auto *VPI = dyn_cast<VPInstruction>(&Recipe))
1128       assignSlot(VPI);
1129     else if (const auto *VPIV = dyn_cast<VPWidenCanonicalIVRecipe>(&Recipe))
1130       assignSlot(VPIV->getVPValue());
1131   }
1132 }
1133 
1134 void VPSlotTracker::assignSlots(const VPlan &Plan) {
1135 
1136   for (const VPValue *V : Plan.VPExternalDefs)
1137     assignSlot(V);
1138 
1139   for (auto &E : Plan.Value2VPValue)
1140     if (!isa<VPInstruction>(E.second))
1141       assignSlot(E.second);
1142 
1143   for (const VPValue *V : Plan.VPCBVs)
1144     assignSlot(V);
1145 
1146   if (Plan.BackedgeTakenCount)
1147     assignSlot(Plan.BackedgeTakenCount);
1148 
1149   ReversePostOrderTraversal<const VPBlockBase *> RPOT(Plan.getEntry());
1150   for (const VPBlockBase *Block : RPOT)
1151     assignSlots(Block);
1152 }
1153