xref: /llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp (revision b021464d35ca9569d430d968034e21c1e592cbb9)
1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This is the LLVM vectorization plan. It represents a candidate for
11 /// vectorization, allowing to plan and optimize how to vectorize a given loop
12 /// before generating LLVM-IR.
13 /// The vectorizer uses vectorization plans to estimate the costs of potential
14 /// candidates and if profitable to execute the desired plan, generating vector
15 /// LLVM-IR code.
16 ///
17 //===----------------------------------------------------------------------===//
18 
19 #include "VPlan.h"
20 #include "LoopVectorizationPlanner.h"
21 #include "VPlanCFG.h"
22 #include "VPlanDominatorTree.h"
23 #include "VPlanPatternMatch.h"
24 #include "VPlanTransforms.h"
25 #include "VPlanUtils.h"
26 #include "llvm/ADT/PostOrderIterator.h"
27 #include "llvm/ADT/STLExtras.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/ADT/Twine.h"
31 #include "llvm/Analysis/DomTreeUpdater.h"
32 #include "llvm/Analysis/LoopInfo.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/CFG.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/Instruction.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/CommandLine.h"
42 #include "llvm/Support/Debug.h"
43 #include "llvm/Support/GraphWriter.h"
44 #include "llvm/Support/raw_ostream.h"
45 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
46 #include "llvm/Transforms/Utils/LoopVersioning.h"
47 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
48 #include <cassert>
49 #include <string>
50 #include <vector>
51 
52 using namespace llvm;
53 using namespace llvm::VPlanPatternMatch;
54 
55 namespace llvm {
56 extern cl::opt<bool> EnableVPlanNativePath;
57 }
58 extern cl::opt<unsigned> ForceTargetInstructionCost;
59 
60 static cl::opt<bool> PrintVPlansInDotFormat(
61     "vplan-print-in-dot-format", cl::Hidden,
62     cl::desc("Use dot format instead of plain text when dumping VPlans"));
63 
64 #define DEBUG_TYPE "vplan"
65 
66 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
67 raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) {
68   const VPInstruction *Instr = dyn_cast<VPInstruction>(&V);
69   VPSlotTracker SlotTracker(
70       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
71   V.print(OS, SlotTracker);
72   return OS;
73 }
74 #endif
75 
76 Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder,
77                                 const ElementCount &VF) const {
78   switch (LaneKind) {
79   case VPLane::Kind::ScalableLast:
80     // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
81     return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
82                              Builder.getInt32(VF.getKnownMinValue() - Lane));
83   case VPLane::Kind::First:
84     return Builder.getInt32(Lane);
85   }
86   llvm_unreachable("Unknown lane kind");
87 }
88 
89 VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
90     : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
91   if (Def)
92     Def->addDefinedValue(this);
93 }
94 
95 VPValue::~VPValue() {
96   assert(Users.empty() && "trying to delete a VPValue with remaining users");
97   if (Def)
98     Def->removeDefinedValue(this);
99 }
100 
101 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
102 void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const {
103   if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def))
104     R->print(OS, "", SlotTracker);
105   else
106     printAsOperand(OS, SlotTracker);
107 }
108 
109 void VPValue::dump() const {
110   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
111   VPSlotTracker SlotTracker(
112       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
113   print(dbgs(), SlotTracker);
114   dbgs() << "\n";
115 }
116 
117 void VPDef::dump() const {
118   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
119   VPSlotTracker SlotTracker(
120       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
121   print(dbgs(), "", SlotTracker);
122   dbgs() << "\n";
123 }
124 #endif
125 
126 VPRecipeBase *VPValue::getDefiningRecipe() {
127   return cast_or_null<VPRecipeBase>(Def);
128 }
129 
130 const VPRecipeBase *VPValue::getDefiningRecipe() const {
131   return cast_or_null<VPRecipeBase>(Def);
132 }
133 
134 // Get the top-most entry block of \p Start. This is the entry block of the
135 // containing VPlan. This function is templated to support both const and non-const blocks
136 template <typename T> static T *getPlanEntry(T *Start) {
137   T *Next = Start;
138   T *Current = Start;
139   while ((Next = Next->getParent()))
140     Current = Next;
141 
142   SmallSetVector<T *, 8> WorkList;
143   WorkList.insert(Current);
144 
145   for (unsigned i = 0; i < WorkList.size(); i++) {
146     T *Current = WorkList[i];
147     if (Current->getNumPredecessors() == 0)
148       return Current;
149     auto &Predecessors = Current->getPredecessors();
150     WorkList.insert(Predecessors.begin(), Predecessors.end());
151   }
152 
153   llvm_unreachable("VPlan without any entry node without predecessors");
154 }
155 
156 VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
157 
158 const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
159 
160 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
161 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
162   const VPBlockBase *Block = this;
163   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
164     Block = Region->getEntry();
165   return cast<VPBasicBlock>(Block);
166 }
167 
168 VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
169   VPBlockBase *Block = this;
170   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
171     Block = Region->getEntry();
172   return cast<VPBasicBlock>(Block);
173 }
174 
175 void VPBlockBase::setPlan(VPlan *ParentPlan) {
176   assert(
177       (ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) &&
178       "Can only set plan on its entry or preheader block.");
179   Plan = ParentPlan;
180 }
181 
182 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
183 const VPBasicBlock *VPBlockBase::getExitingBasicBlock() const {
184   const VPBlockBase *Block = this;
185   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
186     Block = Region->getExiting();
187   return cast<VPBasicBlock>(Block);
188 }
189 
190 VPBasicBlock *VPBlockBase::getExitingBasicBlock() {
191   VPBlockBase *Block = this;
192   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
193     Block = Region->getExiting();
194   return cast<VPBasicBlock>(Block);
195 }
196 
197 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
198   if (!Successors.empty() || !Parent)
199     return this;
200   assert(Parent->getExiting() == this &&
201          "Block w/o successors not the exiting block of its parent.");
202   return Parent->getEnclosingBlockWithSuccessors();
203 }
204 
205 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
206   if (!Predecessors.empty() || !Parent)
207     return this;
208   assert(Parent->getEntry() == this &&
209          "Block w/o predecessors not the entry of its parent.");
210   return Parent->getEnclosingBlockWithPredecessors();
211 }
212 
213 void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
214   for (VPBlockBase *Block : to_vector(vp_depth_first_shallow(Entry)))
215     delete Block;
216 }
217 
218 VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
219   iterator It = begin();
220   while (It != end() && It->isPhi())
221     It++;
222   return It;
223 }
224 
225 VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
226                                    DominatorTree *DT, IRBuilderBase &Builder,
227                                    InnerLoopVectorizer *ILV, VPlan *Plan)
228     : VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
229       LVer(nullptr), TypeAnalysis(Plan->getCanonicalIV()->getScalarType()) {}
230 
231 Value *VPTransformState::get(VPValue *Def, const VPLane &Lane) {
232   if (Def->isLiveIn())
233     return Def->getLiveInIRValue();
234 
235   if (hasScalarValue(Def, Lane))
236     return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)];
237 
238   if (!Lane.isFirstLane() && vputils::isUniformAfterVectorization(Def) &&
239       hasScalarValue(Def, VPLane::getFirstLane())) {
240     return Data.VPV2Scalars[Def][0];
241   }
242 
243   assert(hasVectorValue(Def));
244   auto *VecPart = Data.VPV2Vector[Def];
245   if (!VecPart->getType()->isVectorTy()) {
246     assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar");
247     return VecPart;
248   }
249   // TODO: Cache created scalar values.
250   Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF);
251   auto *Extract = Builder.CreateExtractElement(VecPart, LaneV);
252   // set(Def, Extract, Instance);
253   return Extract;
254 }
255 
256 Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
257   if (NeedsScalar) {
258     assert((VF.isScalar() || Def->isLiveIn() || hasVectorValue(Def) ||
259             !vputils::onlyFirstLaneUsed(Def) ||
260             (hasScalarValue(Def, VPLane(0)) &&
261              Data.VPV2Scalars[Def].size() == 1)) &&
262            "Trying to access a single scalar per part but has multiple scalars "
263            "per part.");
264     return get(Def, VPLane(0));
265   }
266 
267   // If Values have been set for this Def return the one relevant for \p Part.
268   if (hasVectorValue(Def))
269     return Data.VPV2Vector[Def];
270 
271   auto GetBroadcastInstrs = [this, Def](Value *V) {
272     bool SafeToHoist = Def->isDefinedOutsideLoopRegions();
273     if (VF.isScalar())
274       return V;
275     // Place the code for broadcasting invariant variables in the new preheader.
276     IRBuilder<>::InsertPointGuard Guard(Builder);
277     if (SafeToHoist) {
278       BasicBlock *LoopVectorPreHeader =
279           CFG.VPBB2IRBB[Plan->getVectorPreheader()];
280       if (LoopVectorPreHeader)
281         Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
282     }
283 
284     // Place the code for broadcasting invariant variables in the new preheader.
285     // Broadcast the scalar into all locations in the vector.
286     Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
287 
288     return Shuf;
289   };
290 
291   if (!hasScalarValue(Def, {0})) {
292     assert(Def->isLiveIn() && "expected a live-in");
293     Value *IRV = Def->getLiveInIRValue();
294     Value *B = GetBroadcastInstrs(IRV);
295     set(Def, B);
296     return B;
297   }
298 
299   Value *ScalarValue = get(Def, VPLane(0));
300   // If we aren't vectorizing, we can just copy the scalar map values over
301   // to the vector map.
302   if (VF.isScalar()) {
303     set(Def, ScalarValue);
304     return ScalarValue;
305   }
306 
307   bool IsUniform = vputils::isUniformAfterVectorization(Def);
308 
309   VPLane LastLane(IsUniform ? 0 : VF.getKnownMinValue() - 1);
310   // Check if there is a scalar value for the selected lane.
311   if (!hasScalarValue(Def, LastLane)) {
312     // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
313     // VPExpandSCEVRecipes can also be uniform.
314     assert((isa<VPWidenIntOrFpInductionRecipe>(Def->getDefiningRecipe()) ||
315             isa<VPScalarIVStepsRecipe>(Def->getDefiningRecipe()) ||
316             isa<VPExpandSCEVRecipe>(Def->getDefiningRecipe())) &&
317            "unexpected recipe found to be invariant");
318     IsUniform = true;
319     LastLane = 0;
320   }
321 
322   auto *LastInst = cast<Instruction>(get(Def, LastLane));
323   // Set the insert point after the last scalarized instruction or after the
324   // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
325   // will directly follow the scalar definitions.
326   auto OldIP = Builder.saveIP();
327   auto NewIP =
328       isa<PHINode>(LastInst)
329           ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
330           : std::next(BasicBlock::iterator(LastInst));
331   Builder.SetInsertPoint(&*NewIP);
332 
333   // However, if we are vectorizing, we need to construct the vector values.
334   // If the value is known to be uniform after vectorization, we can just
335   // broadcast the scalar value corresponding to lane zero. Otherwise, we
336   // construct the vector values using insertelement instructions. Since the
337   // resulting vectors are stored in State, we will only generate the
338   // insertelements once.
339   Value *VectorValue = nullptr;
340   if (IsUniform) {
341     VectorValue = GetBroadcastInstrs(ScalarValue);
342     set(Def, VectorValue);
343   } else {
344     // Initialize packing with insertelements to start from undef.
345     assert(!VF.isScalable() && "VF is assumed to be non scalable.");
346     Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF));
347     set(Def, Undef);
348     for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
349       packScalarIntoVectorValue(Def, Lane);
350     VectorValue = get(Def);
351   }
352   Builder.restoreIP(OldIP);
353   return VectorValue;
354 }
355 
356 BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
357   VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
358   return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
359 }
360 
361 void VPTransformState::addNewMetadata(Instruction *To,
362                                       const Instruction *Orig) {
363   // If the loop was versioned with memchecks, add the corresponding no-alias
364   // metadata.
365   if (LVer && (isa<LoadInst>(Orig) || isa<StoreInst>(Orig)))
366     LVer->annotateInstWithNoAlias(To, Orig);
367 }
368 
369 void VPTransformState::addMetadata(Value *To, Instruction *From) {
370   // No source instruction to transfer metadata from?
371   if (!From)
372     return;
373 
374   if (Instruction *ToI = dyn_cast<Instruction>(To)) {
375     propagateMetadata(ToI, From);
376     addNewMetadata(ToI, From);
377   }
378 }
379 
380 void VPTransformState::setDebugLocFrom(DebugLoc DL) {
381   const DILocation *DIL = DL;
382   // When a FSDiscriminator is enabled, we don't need to add the multiply
383   // factors to the discriminators.
384   if (DIL &&
385       Builder.GetInsertBlock()
386           ->getParent()
387           ->shouldEmitDebugInfoForProfiling() &&
388       !EnableFSDiscriminator) {
389     // FIXME: For scalable vectors, assume vscale=1.
390     unsigned UF = Plan->getUF();
391     auto NewDIL =
392         DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
393     if (NewDIL)
394       Builder.SetCurrentDebugLocation(*NewDIL);
395     else
396       LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
397                         << DIL->getFilename() << " Line: " << DIL->getLine());
398   } else
399     Builder.SetCurrentDebugLocation(DIL);
400 }
401 
402 void VPTransformState::packScalarIntoVectorValue(VPValue *Def,
403                                                  const VPLane &Lane) {
404   Value *ScalarInst = get(Def, Lane);
405   Value *VectorValue = get(Def);
406   VectorValue = Builder.CreateInsertElement(VectorValue, ScalarInst,
407                                             Lane.getAsRuntimeExpr(Builder, VF));
408   set(Def, VectorValue);
409 }
410 
411 BasicBlock *
412 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
413   // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
414   // Pred stands for Predessor. Prev stands for Previous - last visited/created.
415   BasicBlock *PrevBB = CFG.PrevBB;
416   BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
417                                          PrevBB->getParent(), CFG.ExitBB);
418   LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
419 
420   // Hook up the new basic block to its predecessors.
421   for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
422     VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
423     auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
424     BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
425 
426     assert(PredBB && "Predecessor basic-block not found building successor.");
427     auto *PredBBTerminator = PredBB->getTerminator();
428     LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
429 
430     auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
431     if (isa<UnreachableInst>(PredBBTerminator)) {
432       assert(PredVPSuccessors.size() == 1 &&
433              "Predecessor ending w/o branch must have single successor.");
434       DebugLoc DL = PredBBTerminator->getDebugLoc();
435       PredBBTerminator->eraseFromParent();
436       auto *Br = BranchInst::Create(NewBB, PredBB);
437       Br->setDebugLoc(DL);
438     } else if (TermBr && !TermBr->isConditional()) {
439       TermBr->setSuccessor(0, NewBB);
440     } else {
441       // Set each forward successor here when it is created, excluding
442       // backedges. A backward successor is set when the branch is created.
443       unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
444       assert(!TermBr->getSuccessor(idx) &&
445              "Trying to reset an existing successor block.");
446       TermBr->setSuccessor(idx, NewBB);
447     }
448     CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, NewBB}});
449   }
450   return NewBB;
451 }
452 
453 void VPIRBasicBlock::execute(VPTransformState *State) {
454   assert(getHierarchicalSuccessors().size() <= 2 &&
455          "VPIRBasicBlock can have at most two successors at the moment!");
456   State->Builder.SetInsertPoint(IRBB->getTerminator());
457   executeRecipes(State, IRBB);
458   // Create a branch instruction to terminate IRBB if one was not created yet
459   // and is needed.
460   if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) {
461     auto *Br = State->Builder.CreateBr(IRBB);
462     Br->setOperand(0, nullptr);
463     IRBB->getTerminator()->eraseFromParent();
464   } else {
465     assert(
466         (getNumSuccessors() == 0 || isa<BranchInst>(IRBB->getTerminator())) &&
467         "other blocks must be terminated by a branch");
468   }
469 
470   for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
471     VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
472     BasicBlock *PredBB = State->CFG.VPBB2IRBB[PredVPBB];
473     assert(PredBB && "Predecessor basic-block not found building successor.");
474     LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
475 
476     auto *PredBBTerminator = PredBB->getTerminator();
477     auto *TermBr = cast<BranchInst>(PredBBTerminator);
478     // Set each forward successor here when it is created, excluding
479     // backedges. A backward successor is set when the branch is created.
480     const auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
481     unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
482     assert((!TermBr->getSuccessor(idx) || TermBr->getSuccessor(idx) == IRBB) &&
483            "Trying to reset an existing successor block.");
484     TermBr->setSuccessor(idx, IRBB);
485     State->CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, IRBB}});
486   }
487 }
488 
489 void VPBasicBlock::execute(VPTransformState *State) {
490   bool Replica = bool(State->Lane);
491   VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
492   VPBlockBase *SingleHPred = nullptr;
493   BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
494 
495   auto IsLoopRegion = [](VPBlockBase *BB) {
496     auto *R = dyn_cast<VPRegionBlock>(BB);
497     return R && !R->isReplicator();
498   };
499 
500   // 1. Create an IR basic block.
501   if (PrevVPBB && /* A */
502       !((SingleHPred = getSingleHierarchicalPredecessor()) &&
503         SingleHPred->getExitingBasicBlock() == PrevVPBB &&
504         PrevVPBB->getSingleHierarchicalSuccessor() &&
505         (SingleHPred->getParent() == getEnclosingLoopRegion() &&
506          !IsLoopRegion(SingleHPred))) &&         /* B */
507       !(Replica && getPredecessors().empty())) { /* C */
508     // The last IR basic block is reused, as an optimization, in three cases:
509     // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null;
510     // B. when the current VPBB has a single (hierarchical) predecessor which
511     //    is PrevVPBB and the latter has a single (hierarchical) successor which
512     //    both are in the same non-replicator region; and
513     // C. when the current VPBB is an entry of a region replica - where PrevVPBB
514     //    is the exiting VPBB of this region from a previous instance, or the
515     //    predecessor of this region.
516 
517     NewBB = createEmptyBasicBlock(State->CFG);
518     State->Builder.SetInsertPoint(NewBB);
519     // Temporarily terminate with unreachable until CFG is rewired.
520     UnreachableInst *Terminator = State->Builder.CreateUnreachable();
521     // Register NewBB in its loop. In innermost loops its the same for all
522     // BB's.
523     if (State->CurrentVectorLoop)
524       State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
525     State->Builder.SetInsertPoint(Terminator);
526     State->CFG.PrevBB = NewBB;
527   }
528 
529   // 2. Fill the IR basic block with IR instructions.
530   executeRecipes(State, NewBB);
531 }
532 
533 void VPBasicBlock::dropAllReferences(VPValue *NewValue) {
534   for (VPRecipeBase &R : Recipes) {
535     for (auto *Def : R.definedValues())
536       Def->replaceAllUsesWith(NewValue);
537 
538     for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
539       R.setOperand(I, NewValue);
540   }
541 }
542 
543 void VPBasicBlock::executeRecipes(VPTransformState *State, BasicBlock *BB) {
544   LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
545                     << " in BB:" << BB->getName() << '\n');
546 
547   State->CFG.VPBB2IRBB[this] = BB;
548   State->CFG.PrevVPBB = this;
549 
550   for (VPRecipeBase &Recipe : Recipes)
551     Recipe.execute(*State);
552 
553   LLVM_DEBUG(dbgs() << "LV: filled BB:" << *BB);
554 }
555 
556 VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
557   assert((SplitAt == end() || SplitAt->getParent() == this) &&
558          "can only split at a position in the same block");
559 
560   SmallVector<VPBlockBase *, 2> Succs(successors());
561   // First, disconnect the current block from its successors.
562   for (VPBlockBase *Succ : Succs)
563     VPBlockUtils::disconnectBlocks(this, Succ);
564 
565   // Create new empty block after the block to split.
566   auto *SplitBlock = new VPBasicBlock(getName() + ".split");
567   VPBlockUtils::insertBlockAfter(SplitBlock, this);
568 
569   // Add successors for block to split to new block.
570   for (VPBlockBase *Succ : Succs)
571     VPBlockUtils::connectBlocks(SplitBlock, Succ);
572 
573   // Finally, move the recipes starting at SplitAt to new block.
574   for (VPRecipeBase &ToMove :
575        make_early_inc_range(make_range(SplitAt, this->end())))
576     ToMove.moveBefore(*SplitBlock, SplitBlock->end());
577 
578   return SplitBlock;
579 }
580 
581 /// Return the enclosing loop region for region \p P. The templated version is
582 /// used to support both const and non-const block arguments.
583 template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) {
584   if (P && P->isReplicator()) {
585     P = P->getParent();
586     assert(!cast<VPRegionBlock>(P)->isReplicator() &&
587            "unexpected nested replicate regions");
588   }
589   return P;
590 }
591 
592 VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
593   return getEnclosingLoopRegionForRegion(getParent());
594 }
595 
596 const VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() const {
597   return getEnclosingLoopRegionForRegion(getParent());
598 }
599 
600 static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
601   if (VPBB->empty()) {
602     assert(
603         VPBB->getNumSuccessors() < 2 &&
604         "block with multiple successors doesn't have a recipe as terminator");
605     return false;
606   }
607 
608   const VPRecipeBase *R = &VPBB->back();
609   bool IsCondBranch = isa<VPBranchOnMaskRecipe>(R) ||
610                       match(R, m_BranchOnCond(m_VPValue())) ||
611                       match(R, m_BranchOnCount(m_VPValue(), m_VPValue()));
612   (void)IsCondBranch;
613 
614   if (VPBB->getNumSuccessors() >= 2 ||
615       (VPBB->isExiting() && !VPBB->getParent()->isReplicator())) {
616     assert(IsCondBranch && "block with multiple successors not terminated by "
617                            "conditional branch recipe");
618 
619     return true;
620   }
621 
622   assert(
623       !IsCondBranch &&
624       "block with 0 or 1 successors terminated by conditional branch recipe");
625   return false;
626 }
627 
628 VPRecipeBase *VPBasicBlock::getTerminator() {
629   if (hasConditionalTerminator(this))
630     return &back();
631   return nullptr;
632 }
633 
634 const VPRecipeBase *VPBasicBlock::getTerminator() const {
635   if (hasConditionalTerminator(this))
636     return &back();
637   return nullptr;
638 }
639 
640 bool VPBasicBlock::isExiting() const {
641   return getParent() && getParent()->getExitingBasicBlock() == this;
642 }
643 
644 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
645 void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
646   if (getSuccessors().empty()) {
647     O << Indent << "No successors\n";
648   } else {
649     O << Indent << "Successor(s): ";
650     ListSeparator LS;
651     for (auto *Succ : getSuccessors())
652       O << LS << Succ->getName();
653     O << '\n';
654   }
655 }
656 
657 void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
658                          VPSlotTracker &SlotTracker) const {
659   O << Indent << getName() << ":\n";
660 
661   auto RecipeIndent = Indent + "  ";
662   for (const VPRecipeBase &Recipe : *this) {
663     Recipe.print(O, RecipeIndent, SlotTracker);
664     O << '\n';
665   }
666 
667   printSuccessors(O, Indent);
668 }
669 #endif
670 
671 static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry);
672 
673 // Clone the CFG for all nodes reachable from \p Entry, this includes cloning
674 // the blocks and their recipes. Operands of cloned recipes will NOT be updated.
675 // Remapping of operands must be done separately. Returns a pair with the new
676 // entry and exiting blocks of the cloned region. If \p Entry isn't part of a
677 // region, return nullptr for the exiting block.
678 static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
679   DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks;
680   VPBlockBase *Exiting = nullptr;
681   bool InRegion = Entry->getParent();
682   // First, clone blocks reachable from Entry.
683   for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
684     VPBlockBase *NewBB = BB->clone();
685     Old2NewVPBlocks[BB] = NewBB;
686     if (InRegion && BB->getNumSuccessors() == 0) {
687       assert(!Exiting && "Multiple exiting blocks?");
688       Exiting = BB;
689     }
690   }
691   assert((!InRegion || Exiting) && "regions must have a single exiting block");
692 
693   // Second, update the predecessors & successors of the cloned blocks.
694   for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
695     VPBlockBase *NewBB = Old2NewVPBlocks[BB];
696     SmallVector<VPBlockBase *> NewPreds;
697     for (VPBlockBase *Pred : BB->getPredecessors()) {
698       NewPreds.push_back(Old2NewVPBlocks[Pred]);
699     }
700     NewBB->setPredecessors(NewPreds);
701     SmallVector<VPBlockBase *> NewSuccs;
702     for (VPBlockBase *Succ : BB->successors()) {
703       NewSuccs.push_back(Old2NewVPBlocks[Succ]);
704     }
705     NewBB->setSuccessors(NewSuccs);
706   }
707 
708 #if !defined(NDEBUG)
709   // Verify that the order of predecessors and successors matches in the cloned
710   // version.
711   for (const auto &[OldBB, NewBB] :
712        zip(vp_depth_first_shallow(Entry),
713            vp_depth_first_shallow(Old2NewVPBlocks[Entry]))) {
714     for (const auto &[OldPred, NewPred] :
715          zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
716       assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors");
717 
718     for (const auto &[OldSucc, NewSucc] :
719          zip(OldBB->successors(), NewBB->successors()))
720       assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors");
721   }
722 #endif
723 
724   return std::make_pair(Old2NewVPBlocks[Entry],
725                         Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
726 }
727 
728 VPRegionBlock *VPRegionBlock::clone() {
729   const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
730   auto *NewRegion =
731       new VPRegionBlock(NewEntry, NewExiting, getName(), isReplicator());
732   for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
733     Block->setParent(NewRegion);
734   return NewRegion;
735 }
736 
737 void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
738   for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
739     // Drop all references in VPBasicBlocks and replace all uses with
740     // DummyValue.
741     Block->dropAllReferences(NewValue);
742 }
743 
744 void VPRegionBlock::execute(VPTransformState *State) {
745   ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
746       RPOT(Entry);
747 
748   if (!isReplicator()) {
749     // Create and register the new vector loop.
750     Loop *PrevLoop = State->CurrentVectorLoop;
751     State->CurrentVectorLoop = State->LI->AllocateLoop();
752     BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
753     Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
754 
755     // Insert the new loop into the loop nest and register the new basic blocks
756     // before calling any utilities such as SCEV that require valid LoopInfo.
757     if (ParentLoop)
758       ParentLoop->addChildLoop(State->CurrentVectorLoop);
759     else
760       State->LI->addTopLevelLoop(State->CurrentVectorLoop);
761 
762     // Visit the VPBlocks connected to "this", starting from it.
763     for (VPBlockBase *Block : RPOT) {
764       LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
765       Block->execute(State);
766     }
767 
768     State->CurrentVectorLoop = PrevLoop;
769     return;
770   }
771 
772   assert(!State->Lane && "Replicating a Region with non-null instance.");
773 
774   // Enter replicating mode.
775   assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
776   State->Lane = VPLane(0);
777   for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
778        ++Lane) {
779     State->Lane = VPLane(Lane, VPLane::Kind::First);
780     // Visit the VPBlocks connected to \p this, starting from it.
781     for (VPBlockBase *Block : RPOT) {
782       LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
783       Block->execute(State);
784     }
785   }
786 
787   // Exit replicating mode.
788   State->Lane.reset();
789 }
790 
791 InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) {
792   InstructionCost Cost = 0;
793   for (VPRecipeBase &R : Recipes)
794     Cost += R.cost(VF, Ctx);
795   return Cost;
796 }
797 
798 InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) {
799   if (!isReplicator()) {
800     InstructionCost Cost = 0;
801     for (VPBlockBase *Block : vp_depth_first_shallow(getEntry()))
802       Cost += Block->cost(VF, Ctx);
803     InstructionCost BackedgeCost =
804         ForceTargetInstructionCost.getNumOccurrences()
805             ? InstructionCost(ForceTargetInstructionCost.getNumOccurrences())
806             : Ctx.TTI.getCFInstrCost(Instruction::Br, TTI::TCK_RecipThroughput);
807     LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF
808                       << ": vector loop backedge\n");
809     Cost += BackedgeCost;
810     return Cost;
811   }
812 
813   // Compute the cost of a replicate region. Replicating isn't supported for
814   // scalable vectors, return an invalid cost for them.
815   // TODO: Discard scalable VPlans with replicate recipes earlier after
816   // construction.
817   if (VF.isScalable())
818     return InstructionCost::getInvalid();
819 
820   // First compute the cost of the conditionally executed recipes, followed by
821   // account for the branching cost, except if the mask is a header mask or
822   // uniform condition.
823   using namespace llvm::VPlanPatternMatch;
824   VPBasicBlock *Then = cast<VPBasicBlock>(getEntry()->getSuccessors()[0]);
825   InstructionCost ThenCost = Then->cost(VF, Ctx);
826 
827   // For the scalar case, we may not always execute the original predicated
828   // block, Thus, scale the block's cost by the probability of executing it.
829   if (VF.isScalar())
830     return ThenCost / getReciprocalPredBlockProb();
831 
832   return ThenCost;
833 }
834 
835 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
836 void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
837                           VPSlotTracker &SlotTracker) const {
838   O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
839   auto NewIndent = Indent + "  ";
840   for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
841     O << '\n';
842     BlockBase->print(O, NewIndent, SlotTracker);
843   }
844   O << Indent << "}\n";
845 
846   printSuccessors(O, Indent);
847 }
848 #endif
849 
850 VPlan::~VPlan() {
851   if (Entry) {
852     VPValue DummyValue;
853     for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
854       Block->dropAllReferences(&DummyValue);
855 
856     VPBlockBase::deleteCFG(Entry);
857 
858     Preheader->dropAllReferences(&DummyValue);
859     delete Preheader;
860   }
861   for (VPValue *VPV : VPLiveInsToFree)
862     delete VPV;
863   if (BackedgeTakenCount)
864     delete BackedgeTakenCount;
865 }
866 
867 VPIRBasicBlock *VPIRBasicBlock::fromBasicBlock(BasicBlock *IRBB) {
868   auto *VPIRBB = new VPIRBasicBlock(IRBB);
869   for (Instruction &I :
870        make_range(IRBB->begin(), IRBB->getTerminator()->getIterator()))
871     VPIRBB->appendRecipe(new VPIRInstruction(I));
872   return VPIRBB;
873 }
874 
875 VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
876                                    PredicatedScalarEvolution &PSE,
877                                    bool RequiresScalarEpilogueCheck,
878                                    bool TailFolded, Loop *TheLoop) {
879   VPIRBasicBlock *Entry =
880       VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader());
881   VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
882   VPIRBasicBlock *ScalarHeader =
883       VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader());
884   auto Plan = std::make_unique<VPlan>(Entry, VecPreheader, ScalarHeader);
885 
886   // Create SCEV and VPValue for the trip count.
887 
888   // Currently only loops with countable exits are vectorized, but calling
889   // getSymbolicMaxBackedgeTakenCount allows enablement work for loops with
890   // uncountable exits whilst also ensuring the symbolic maximum and known
891   // back-edge taken count remain identical for loops with countable exits.
892   const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
893   assert((!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
894           BackedgeTakenCountSCEV == PSE.getBackedgeTakenCount()) &&
895          "Invalid loop count");
896   ScalarEvolution &SE = *PSE.getSE();
897   const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
898                                                        InductionTy, TheLoop);
899   Plan->TripCount =
900       vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE);
901 
902   // Create VPRegionBlock, with empty header and latch blocks, to be filled
903   // during processing later.
904   VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body");
905   VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch");
906   VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB);
907   auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop",
908                                       false /*isReplicator*/);
909 
910   VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader);
911   VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block");
912   VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
913 
914   VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
915   VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader);
916   if (!RequiresScalarEpilogueCheck) {
917     VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
918     return Plan;
919   }
920 
921   // If needed, add a check in the middle block to see if we have completed
922   // all of the iterations in the first vector loop.  Three cases:
923   // 1) If (N - N%VF) == N, then we *don't* need to run the remainder.
924   //    Thus if tail is to be folded, we know we don't need to run the
925   //    remainder and we can set the condition to true.
926   // 2) If we require a scalar epilogue, there is no conditional branch as
927   //    we unconditionally branch to the scalar preheader.  Do nothing.
928   // 3) Otherwise, construct a runtime check.
929   BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock();
930   auto *VPExitBlock = VPIRBasicBlock::fromBasicBlock(IRExitBlock);
931   // The connection order corresponds to the operands of the conditional branch.
932   VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB);
933   VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
934 
935   auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator();
936   // Here we use the same DebugLoc as the scalar loop latch terminator instead
937   // of the corresponding compare because they may have ended up with
938   // different line numbers and we want to avoid awkward line stepping while
939   // debugging. Eg. if the compare has got a line number inside the loop.
940   VPBuilder Builder(MiddleVPBB);
941   VPValue *Cmp =
942       TailFolded
943           ? Plan->getOrAddLiveIn(ConstantInt::getTrue(
944                 IntegerType::getInt1Ty(TripCount->getType()->getContext())))
945           : Builder.createICmp(CmpInst::ICMP_EQ, Plan->getTripCount(),
946                                &Plan->getVectorTripCount(),
947                                ScalarLatchTerm->getDebugLoc(), "cmp.n");
948   Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
949                        ScalarLatchTerm->getDebugLoc());
950   return Plan;
951 }
952 
953 void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
954                              Value *CanonicalIVStartValue,
955                              VPTransformState &State) {
956   Type *TCTy = TripCountV->getType();
957   // Check if the backedge taken count is needed, and if so build it.
958   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
959     IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
960     auto *TCMO = Builder.CreateSub(TripCountV, ConstantInt::get(TCTy, 1),
961                                    "trip.count.minus.1");
962     BackedgeTakenCount->setUnderlyingValue(TCMO);
963   }
964 
965   VectorTripCount.setUnderlyingValue(VectorTripCountV);
966 
967   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
968   // FIXME: Model VF * UF computation completely in VPlan.
969   assert(VFxUF.getNumUsers() && "VFxUF expected to always have users");
970   unsigned UF = getUF();
971   if (VF.getNumUsers()) {
972     Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF);
973     VF.setUnderlyingValue(RuntimeVF);
974     VFxUF.setUnderlyingValue(
975         UF > 1 ? Builder.CreateMul(RuntimeVF, ConstantInt::get(TCTy, UF))
976                : RuntimeVF);
977   } else {
978     VFxUF.setUnderlyingValue(createStepForVF(Builder, TCTy, State.VF, UF));
979   }
980 
981   // When vectorizing the epilogue loop, the canonical induction start value
982   // needs to be changed from zero to the value after the main vector loop.
983   // FIXME: Improve modeling for canonical IV start values in the epilogue loop.
984   if (CanonicalIVStartValue) {
985     VPValue *VPV = getOrAddLiveIn(CanonicalIVStartValue);
986     auto *IV = getCanonicalIV();
987     assert(all_of(IV->users(),
988                   [](const VPUser *U) {
989                     return isa<VPScalarIVStepsRecipe>(U) ||
990                            isa<VPScalarCastRecipe>(U) ||
991                            isa<VPDerivedIVRecipe>(U) ||
992                            cast<VPInstruction>(U)->getOpcode() ==
993                                Instruction::Add;
994                   }) &&
995            "the canonical IV should only be used by its increment or "
996            "ScalarIVSteps when resetting the start value");
997     IV->setOperand(0, VPV);
998   }
999 }
1000 
1001 /// Replace \p VPBB with a VPIRBasicBlock wrapping \p IRBB. All recipes from \p
1002 /// VPBB are moved to the end of the newly created VPIRBasicBlock. VPBB must
1003 /// have a single predecessor, which is rewired to the new VPIRBasicBlock. All
1004 /// successors of VPBB, if any, are rewired to the new VPIRBasicBlock.
1005 static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
1006   VPIRBasicBlock *IRVPBB = VPIRBasicBlock::fromBasicBlock(IRBB);
1007   for (auto &R : make_early_inc_range(*VPBB)) {
1008     assert(!R.isPhi() && "Tried to move phi recipe to end of block");
1009     R.moveBefore(*IRVPBB, IRVPBB->end());
1010   }
1011 
1012   VPBlockUtils::reassociateBlocks(VPBB, IRVPBB);
1013 
1014   delete VPBB;
1015 }
1016 
1017 /// Generate the code inside the preheader and body of the vectorized loop.
1018 /// Assumes a single pre-header basic-block was created for this. Introduce
1019 /// additional basic-blocks as needed, and fill them all.
1020 void VPlan::execute(VPTransformState *State) {
1021   // Initialize CFG state.
1022   State->CFG.PrevVPBB = nullptr;
1023   State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
1024   BasicBlock *VectorPreHeader = State->CFG.PrevBB;
1025   State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
1026 
1027   // Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
1028   cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
1029   State->CFG.DTU.applyUpdates(
1030       {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
1031 
1032   // Replace regular VPBB's for the middle and scalar preheader blocks with
1033   // VPIRBasicBlocks wrapping their IR blocks. The IR blocks are created during
1034   // skeleton creation, so we can only create the VPIRBasicBlocks now during
1035   // VPlan execution rather than earlier during VPlan construction.
1036   BasicBlock *MiddleBB = State->CFG.ExitBB;
1037   VPBasicBlock *MiddleVPBB =
1038       cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
1039   BasicBlock *ScalarPh = MiddleBB->getSingleSuccessor();
1040   replaceVPBBWithIRVPBB(getScalarPreheader(), ScalarPh);
1041   replaceVPBBWithIRVPBB(MiddleVPBB, MiddleBB);
1042 
1043   // Disconnect the middle block from its single successor (the scalar loop
1044   // header) in both the CFG and DT. The branch will be recreated during VPlan
1045   // execution.
1046   auto *BrInst = new UnreachableInst(MiddleBB->getContext());
1047   BrInst->insertBefore(MiddleBB->getTerminator());
1048   MiddleBB->getTerminator()->eraseFromParent();
1049   State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
1050   // Disconnect scalar preheader and scalar header, as the dominator tree edge will be updated as part of VPlan execution. This allows keeping the DTU logic generic during VPlan execution.
1051   State->CFG.DTU.applyUpdates(
1052       {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
1053 
1054   // Generate code in the loop pre-header and body.
1055   for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
1056     Block->execute(State);
1057 
1058   VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
1059   BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
1060 
1061   // Fix the latch value of canonical, reduction and first-order recurrences
1062   // phis in the vector loop.
1063   VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
1064   for (VPRecipeBase &R : Header->phis()) {
1065     // Skip phi-like recipes that generate their backedege values themselves.
1066     if (isa<VPWidenPHIRecipe>(&R))
1067       continue;
1068 
1069     if (isa<VPWidenPointerInductionRecipe>(&R) ||
1070         isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1071       PHINode *Phi = nullptr;
1072       if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1073         Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
1074       } else {
1075         auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1076         assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1077                "recipe generating only scalars should have been replaced");
1078         auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1079         Phi = cast<PHINode>(GEP->getPointerOperand());
1080       }
1081 
1082       Phi->setIncomingBlock(1, VectorLatchBB);
1083 
1084       // Move the last step to the end of the latch block. This ensures
1085       // consistent placement of all induction updates.
1086       Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1087       Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
1088 
1089       // Use the steps for the last part as backedge value for the induction.
1090       if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1091         Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
1092       continue;
1093     }
1094 
1095     auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1096     bool NeedsScalar =
1097         isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1098         (isa<VPReductionPHIRecipe>(PhiR) &&
1099          cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1100     Value *Phi = State->get(PhiR, NeedsScalar);
1101     Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
1102     cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1103   }
1104 
1105   State->CFG.DTU.flush();
1106   assert(State->CFG.DTU.getDomTree().verify(
1107              DominatorTree::VerificationLevel::Fast) &&
1108          "DT not preserved correctly");
1109 }
1110 
1111 InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
1112   // For now only return the cost of the vector loop region, ignoring any other
1113   // blocks, like the preheader or middle blocks.
1114   return getVectorLoopRegion()->cost(VF, Ctx);
1115 }
1116 
1117 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1118 void VPlan::printLiveIns(raw_ostream &O) const {
1119   VPSlotTracker SlotTracker(this);
1120 
1121   if (VF.getNumUsers() > 0) {
1122     O << "\nLive-in ";
1123     VF.printAsOperand(O, SlotTracker);
1124     O << " = VF";
1125   }
1126 
1127   if (VFxUF.getNumUsers() > 0) {
1128     O << "\nLive-in ";
1129     VFxUF.printAsOperand(O, SlotTracker);
1130     O << " = VF * UF";
1131   }
1132 
1133   if (VectorTripCount.getNumUsers() > 0) {
1134     O << "\nLive-in ";
1135     VectorTripCount.printAsOperand(O, SlotTracker);
1136     O << " = vector-trip-count";
1137   }
1138 
1139   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1140     O << "\nLive-in ";
1141     BackedgeTakenCount->printAsOperand(O, SlotTracker);
1142     O << " = backedge-taken count";
1143   }
1144 
1145   O << "\n";
1146   if (TripCount->isLiveIn())
1147     O << "Live-in ";
1148   TripCount->printAsOperand(O, SlotTracker);
1149   O << " = original trip-count";
1150   O << "\n";
1151 }
1152 
1153 LLVM_DUMP_METHOD
1154 void VPlan::print(raw_ostream &O) const {
1155   VPSlotTracker SlotTracker(this);
1156 
1157   O << "VPlan '" << getName() << "' {";
1158 
1159   printLiveIns(O);
1160 
1161   if (!getPreheader()->empty()) {
1162     O << "\n";
1163     getPreheader()->print(O, "", SlotTracker);
1164   }
1165 
1166   for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) {
1167     O << '\n';
1168     Block->print(O, "", SlotTracker);
1169   }
1170 
1171   O << "}\n";
1172 }
1173 
1174 std::string VPlan::getName() const {
1175   std::string Out;
1176   raw_string_ostream RSO(Out);
1177   RSO << Name << " for ";
1178   if (!VFs.empty()) {
1179     RSO << "VF={" << VFs[0];
1180     for (ElementCount VF : drop_begin(VFs))
1181       RSO << "," << VF;
1182     RSO << "},";
1183   }
1184 
1185   if (UFs.empty()) {
1186     RSO << "UF>=1";
1187   } else {
1188     RSO << "UF={" << UFs[0];
1189     for (unsigned UF : drop_begin(UFs))
1190       RSO << "," << UF;
1191     RSO << "}";
1192   }
1193 
1194   return Out;
1195 }
1196 
1197 LLVM_DUMP_METHOD
1198 void VPlan::printDOT(raw_ostream &O) const {
1199   VPlanPrinter Printer(O, *this);
1200   Printer.dump();
1201 }
1202 
1203 LLVM_DUMP_METHOD
1204 void VPlan::dump() const { print(dbgs()); }
1205 #endif
1206 
1207 static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
1208                           DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
1209   // Update the operands of all cloned recipes starting at NewEntry. This
1210   // traverses all reachable blocks. This is done in two steps, to handle cycles
1211   // in PHI recipes.
1212   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
1213       OldDeepRPOT(Entry);
1214   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
1215       NewDeepRPOT(NewEntry);
1216   // First, collect all mappings from old to new VPValues defined by cloned
1217   // recipes.
1218   for (const auto &[OldBB, NewBB] :
1219        zip(VPBlockUtils::blocksOnly<VPBasicBlock>(OldDeepRPOT),
1220            VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT))) {
1221     assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() &&
1222            "blocks must have the same number of recipes");
1223     for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
1224       assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1225              "recipes must have the same number of operands");
1226       assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1227              "recipes must define the same number of operands");
1228       for (const auto &[OldV, NewV] :
1229            zip(OldR.definedValues(), NewR.definedValues()))
1230         Old2NewVPValues[OldV] = NewV;
1231     }
1232   }
1233 
1234   // Update all operands to use cloned VPValues.
1235   for (VPBasicBlock *NewBB :
1236        VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT)) {
1237     for (VPRecipeBase &NewR : *NewBB)
1238       for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1239         VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
1240         NewR.setOperand(I, NewOp);
1241       }
1242   }
1243 }
1244 
1245 VPlan *VPlan::duplicate() {
1246   // Clone blocks.
1247   VPBasicBlock *NewPreheader = Preheader->clone();
1248   const auto &[NewEntry, __] = cloneFrom(Entry);
1249 
1250   BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
1251   VPIRBasicBlock *NewScalarHeader = cast<VPIRBasicBlock>(*find_if(
1252       vp_depth_first_shallow(NewEntry), [ScalarHeaderIRBB](VPBlockBase *VPB) {
1253         auto *VPIRBB = dyn_cast<VPIRBasicBlock>(VPB);
1254         return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
1255       }));
1256   // Create VPlan, clone live-ins and remap operands in the cloned blocks.
1257   auto *NewPlan =
1258       new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry), NewScalarHeader);
1259   DenseMap<VPValue *, VPValue *> Old2NewVPValues;
1260   for (VPValue *OldLiveIn : VPLiveInsToFree) {
1261     Old2NewVPValues[OldLiveIn] =
1262         NewPlan->getOrAddLiveIn(OldLiveIn->getLiveInIRValue());
1263   }
1264   Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
1265   Old2NewVPValues[&VF] = &NewPlan->VF;
1266   Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
1267   if (BackedgeTakenCount) {
1268     NewPlan->BackedgeTakenCount = new VPValue();
1269     Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;
1270   }
1271   assert(TripCount && "trip count must be set");
1272   if (TripCount->isLiveIn())
1273     Old2NewVPValues[TripCount] =
1274         NewPlan->getOrAddLiveIn(TripCount->getLiveInIRValue());
1275   // else NewTripCount will be created and inserted into Old2NewVPValues when
1276   // TripCount is cloned. In any case NewPlan->TripCount is updated below.
1277 
1278   remapOperands(Preheader, NewPreheader, Old2NewVPValues);
1279   remapOperands(Entry, NewEntry, Old2NewVPValues);
1280 
1281   // Initialize remaining fields of cloned VPlan.
1282   NewPlan->VFs = VFs;
1283   NewPlan->UFs = UFs;
1284   // TODO: Adjust names.
1285   NewPlan->Name = Name;
1286   assert(Old2NewVPValues.contains(TripCount) &&
1287          "TripCount must have been added to Old2NewVPValues");
1288   NewPlan->TripCount = Old2NewVPValues[TripCount];
1289   return NewPlan;
1290 }
1291 
1292 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1293 
1294 Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
1295   return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
1296          Twine(getOrCreateBID(Block));
1297 }
1298 
1299 Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
1300   const std::string &Name = Block->getName();
1301   if (!Name.empty())
1302     return Name;
1303   return "VPB" + Twine(getOrCreateBID(Block));
1304 }
1305 
1306 void VPlanPrinter::dump() {
1307   Depth = 1;
1308   bumpIndent(0);
1309   OS << "digraph VPlan {\n";
1310   OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1311   if (!Plan.getName().empty())
1312     OS << "\\n" << DOT::EscapeString(Plan.getName());
1313 
1314   {
1315     // Print live-ins.
1316   std::string Str;
1317   raw_string_ostream SS(Str);
1318   Plan.printLiveIns(SS);
1319   SmallVector<StringRef, 0> Lines;
1320   StringRef(Str).rtrim('\n').split(Lines, "\n");
1321   for (auto Line : Lines)
1322     OS << DOT::EscapeString(Line.str()) << "\\n";
1323   }
1324 
1325   OS << "\"]\n";
1326   OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
1327   OS << "edge [fontname=Courier, fontsize=30]\n";
1328   OS << "compound=true\n";
1329 
1330   dumpBlock(Plan.getPreheader());
1331 
1332   for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
1333     dumpBlock(Block);
1334 
1335   OS << "}\n";
1336 }
1337 
1338 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
1339   if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
1340     dumpBasicBlock(BasicBlock);
1341   else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1342     dumpRegion(Region);
1343   else
1344     llvm_unreachable("Unsupported kind of VPBlock.");
1345 }
1346 
1347 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
1348                             bool Hidden, const Twine &Label) {
1349   // Due to "dot" we print an edge between two regions as an edge between the
1350   // exiting basic block and the entry basic of the respective regions.
1351   const VPBlockBase *Tail = From->getExitingBasicBlock();
1352   const VPBlockBase *Head = To->getEntryBasicBlock();
1353   OS << Indent << getUID(Tail) << " -> " << getUID(Head);
1354   OS << " [ label=\"" << Label << '\"';
1355   if (Tail != From)
1356     OS << " ltail=" << getUID(From);
1357   if (Head != To)
1358     OS << " lhead=" << getUID(To);
1359   if (Hidden)
1360     OS << "; splines=none";
1361   OS << "]\n";
1362 }
1363 
1364 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
1365   auto &Successors = Block->getSuccessors();
1366   if (Successors.size() == 1)
1367     drawEdge(Block, Successors.front(), false, "");
1368   else if (Successors.size() == 2) {
1369     drawEdge(Block, Successors.front(), false, "T");
1370     drawEdge(Block, Successors.back(), false, "F");
1371   } else {
1372     unsigned SuccessorNumber = 0;
1373     for (auto *Successor : Successors)
1374       drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
1375   }
1376 }
1377 
1378 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
1379   // Implement dot-formatted dump by performing plain-text dump into the
1380   // temporary storage followed by some post-processing.
1381   OS << Indent << getUID(BasicBlock) << " [label =\n";
1382   bumpIndent(1);
1383   std::string Str;
1384   raw_string_ostream SS(Str);
1385   // Use no indentation as we need to wrap the lines into quotes ourselves.
1386   BasicBlock->print(SS, "", SlotTracker);
1387 
1388   // We need to process each line of the output separately, so split
1389   // single-string plain-text dump.
1390   SmallVector<StringRef, 0> Lines;
1391   StringRef(Str).rtrim('\n').split(Lines, "\n");
1392 
1393   auto EmitLine = [&](StringRef Line, StringRef Suffix) {
1394     OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
1395   };
1396 
1397   // Don't need the "+" after the last line.
1398   for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
1399     EmitLine(Line, " +\n");
1400   EmitLine(Lines.back(), "\n");
1401 
1402   bumpIndent(-1);
1403   OS << Indent << "]\n";
1404 
1405   dumpEdges(BasicBlock);
1406 }
1407 
1408 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
1409   OS << Indent << "subgraph " << getUID(Region) << " {\n";
1410   bumpIndent(1);
1411   OS << Indent << "fontname=Courier\n"
1412      << Indent << "label=\""
1413      << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
1414      << DOT::EscapeString(Region->getName()) << "\"\n";
1415   // Dump the blocks of the region.
1416   assert(Region->getEntry() && "Region contains no inner blocks.");
1417   for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry()))
1418     dumpBlock(Block);
1419   bumpIndent(-1);
1420   OS << Indent << "}\n";
1421   dumpEdges(Region);
1422 }
1423 
1424 void VPlanIngredient::print(raw_ostream &O) const {
1425   if (auto *Inst = dyn_cast<Instruction>(V)) {
1426     if (!Inst->getType()->isVoidTy()) {
1427       Inst->printAsOperand(O, false);
1428       O << " = ";
1429     }
1430     O << Inst->getOpcodeName() << " ";
1431     unsigned E = Inst->getNumOperands();
1432     if (E > 0) {
1433       Inst->getOperand(0)->printAsOperand(O, false);
1434       for (unsigned I = 1; I < E; ++I)
1435         Inst->getOperand(I)->printAsOperand(O << ", ", false);
1436     }
1437   } else // !Inst
1438     V->printAsOperand(O, false);
1439 }
1440 
1441 #endif
1442 
1443 bool VPValue::isDefinedOutsideLoopRegions() const {
1444   return !hasDefiningRecipe() ||
1445          !getDefiningRecipe()->getParent()->getEnclosingLoopRegion();
1446 }
1447 
1448 void VPValue::replaceAllUsesWith(VPValue *New) {
1449   replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
1450 }
1451 
1452 void VPValue::replaceUsesWithIf(
1453     VPValue *New,
1454     llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) {
1455   // Note that this early exit is required for correctness; the implementation
1456   // below relies on the number of users for this VPValue to decrease, which
1457   // isn't the case if this == New.
1458   if (this == New)
1459     return;
1460 
1461   for (unsigned J = 0; J < getNumUsers();) {
1462     VPUser *User = Users[J];
1463     bool RemovedUser = false;
1464     for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) {
1465       if (User->getOperand(I) != this || !ShouldReplace(*User, I))
1466         continue;
1467 
1468       RemovedUser = true;
1469       User->setOperand(I, New);
1470     }
1471     // If a user got removed after updating the current user, the next user to
1472     // update will be moved to the current position, so we only need to
1473     // increment the index if the number of users did not change.
1474     if (!RemovedUser)
1475       J++;
1476   }
1477 }
1478 
1479 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1480 void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const {
1481   OS << Tracker.getOrCreateName(this);
1482 }
1483 
1484 void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
1485   interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
1486     Op->printAsOperand(O, SlotTracker);
1487   });
1488 }
1489 #endif
1490 
1491 void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
1492                                           Old2NewTy &Old2New,
1493                                           InterleavedAccessInfo &IAI) {
1494   ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
1495       RPOT(Region->getEntry());
1496   for (VPBlockBase *Base : RPOT) {
1497     visitBlock(Base, Old2New, IAI);
1498   }
1499 }
1500 
1501 void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
1502                                          InterleavedAccessInfo &IAI) {
1503   if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
1504     for (VPRecipeBase &VPI : *VPBB) {
1505       if (isa<VPWidenPHIRecipe>(&VPI))
1506         continue;
1507       assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
1508       auto *VPInst = cast<VPInstruction>(&VPI);
1509 
1510       auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
1511       if (!Inst)
1512         continue;
1513       auto *IG = IAI.getInterleaveGroup(Inst);
1514       if (!IG)
1515         continue;
1516 
1517       auto NewIGIter = Old2New.find(IG);
1518       if (NewIGIter == Old2New.end())
1519         Old2New[IG] = new InterleaveGroup<VPInstruction>(
1520             IG->getFactor(), IG->isReverse(), IG->getAlign());
1521 
1522       if (Inst == IG->getInsertPos())
1523         Old2New[IG]->setInsertPos(VPInst);
1524 
1525       InterleaveGroupMap[VPInst] = Old2New[IG];
1526       InterleaveGroupMap[VPInst]->insertMember(
1527           VPInst, IG->getIndex(Inst),
1528           Align(IG->isReverse() ? (-1) * int(IG->getFactor())
1529                                 : IG->getFactor()));
1530     }
1531   } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1532     visitRegion(Region, Old2New, IAI);
1533   else
1534     llvm_unreachable("Unsupported kind of VPBlock.");
1535 }
1536 
1537 VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
1538                                                  InterleavedAccessInfo &IAI) {
1539   Old2NewTy Old2New;
1540   visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI);
1541 }
1542 
1543 void VPSlotTracker::assignName(const VPValue *V) {
1544   assert(!VPValue2Name.contains(V) && "VPValue already has a name!");
1545   auto *UV = V->getUnderlyingValue();
1546   auto *VPI = dyn_cast_or_null<VPInstruction>(V->getDefiningRecipe());
1547   if (!UV && !(VPI && !VPI->getName().empty())) {
1548     VPValue2Name[V] = (Twine("vp<%") + Twine(NextSlot) + ">").str();
1549     NextSlot++;
1550     return;
1551   }
1552 
1553   // Use the name of the underlying Value, wrapped in "ir<>", and versioned by
1554   // appending ".Number" to the name if there are multiple uses.
1555   std::string Name;
1556   if (UV) {
1557     raw_string_ostream S(Name);
1558     UV->printAsOperand(S, false);
1559   } else
1560     Name = VPI->getName();
1561 
1562   assert(!Name.empty() && "Name cannot be empty.");
1563   StringRef Prefix = UV ? "ir<" : "vp<%";
1564   std::string BaseName = (Twine(Prefix) + Name + Twine(">")).str();
1565 
1566   // First assign the base name for V.
1567   const auto &[A, _] = VPValue2Name.insert({V, BaseName});
1568   // Integer or FP constants with different types will result in he same string
1569   // due to stripping types.
1570   if (V->isLiveIn() && isa<ConstantInt, ConstantFP>(UV))
1571     return;
1572 
1573   // If it is already used by C > 0 other VPValues, increase the version counter
1574   // C and use it for V.
1575   const auto &[C, UseInserted] = BaseName2Version.insert({BaseName, 0});
1576   if (!UseInserted) {
1577     C->second++;
1578     A->second = (BaseName + Twine(".") + Twine(C->second)).str();
1579   }
1580 }
1581 
1582 void VPSlotTracker::assignNames(const VPlan &Plan) {
1583   if (Plan.VF.getNumUsers() > 0)
1584     assignName(&Plan.VF);
1585   if (Plan.VFxUF.getNumUsers() > 0)
1586     assignName(&Plan.VFxUF);
1587   assignName(&Plan.VectorTripCount);
1588   if (Plan.BackedgeTakenCount)
1589     assignName(Plan.BackedgeTakenCount);
1590   for (VPValue *LI : Plan.VPLiveInsToFree)
1591     assignName(LI);
1592   assignNames(Plan.getPreheader());
1593 
1594   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
1595       RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
1596   for (const VPBasicBlock *VPBB :
1597        VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
1598     assignNames(VPBB);
1599 }
1600 
1601 void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
1602   for (const VPRecipeBase &Recipe : *VPBB)
1603     for (VPValue *Def : Recipe.definedValues())
1604       assignName(Def);
1605 }
1606 
1607 std::string VPSlotTracker::getOrCreateName(const VPValue *V) const {
1608   std::string Name = VPValue2Name.lookup(V);
1609   if (!Name.empty())
1610     return Name;
1611 
1612   // If no name was assigned, no VPlan was provided when creating the slot
1613   // tracker or it is not reachable from the provided VPlan. This can happen,
1614   // e.g. when trying to print a recipe that has not been inserted into a VPlan
1615   // in a debugger.
1616   // TODO: Update VPSlotTracker constructor to assign names to recipes &
1617   // VPValues not associated with a VPlan, instead of constructing names ad-hoc
1618   // here.
1619   const VPRecipeBase *DefR = V->getDefiningRecipe();
1620   (void)DefR;
1621   assert((!DefR || !DefR->getParent() || !DefR->getParent()->getPlan()) &&
1622          "VPValue defined by a recipe in a VPlan?");
1623 
1624   // Use the underlying value's name, if there is one.
1625   if (auto *UV = V->getUnderlyingValue()) {
1626     std::string Name;
1627     raw_string_ostream S(Name);
1628     UV->printAsOperand(S, false);
1629     return (Twine("ir<") + Name + ">").str();
1630   }
1631 
1632   return "<badref>";
1633 }
1634 
1635 bool LoopVectorizationPlanner::getDecisionAndClampRange(
1636     const std::function<bool(ElementCount)> &Predicate, VFRange &Range) {
1637   assert(!Range.isEmpty() && "Trying to test an empty VF range.");
1638   bool PredicateAtRangeStart = Predicate(Range.Start);
1639 
1640   for (ElementCount TmpVF : VFRange(Range.Start * 2, Range.End))
1641     if (Predicate(TmpVF) != PredicateAtRangeStart) {
1642       Range.End = TmpVF;
1643       break;
1644     }
1645 
1646   return PredicateAtRangeStart;
1647 }
1648 
1649 /// Build VPlans for the full range of feasible VF's = {\p MinVF, 2 * \p MinVF,
1650 /// 4 * \p MinVF, ..., \p MaxVF} by repeatedly building a VPlan for a sub-range
1651 /// of VF's starting at a given VF and extending it as much as possible. Each
1652 /// vectorization decision can potentially shorten this sub-range during
1653 /// buildVPlan().
1654 void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF,
1655                                            ElementCount MaxVF) {
1656   auto MaxVFTimes2 = MaxVF * 2;
1657   for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
1658     VFRange SubRange = {VF, MaxVFTimes2};
1659     auto Plan = buildVPlan(SubRange);
1660     VPlanTransforms::optimize(*Plan);
1661     VPlans.push_back(std::move(Plan));
1662     VF = SubRange.End;
1663   }
1664 }
1665 
1666 VPlan &LoopVectorizationPlanner::getPlanFor(ElementCount VF) const {
1667   assert(count_if(VPlans,
1668                   [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
1669              1 &&
1670          "Multiple VPlans for VF.");
1671 
1672   for (const VPlanPtr &Plan : VPlans) {
1673     if (Plan->hasVF(VF))
1674       return *Plan.get();
1675   }
1676   llvm_unreachable("No plan found!");
1677 }
1678 
1679 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1680 void LoopVectorizationPlanner::printPlans(raw_ostream &O) {
1681   if (VPlans.empty()) {
1682     O << "LV: No VPlans built.\n";
1683     return;
1684   }
1685   for (const auto &Plan : VPlans)
1686     if (PrintVPlansInDotFormat)
1687       Plan->printDOT(O);
1688     else
1689       Plan->print(O);
1690 }
1691 #endif
1692 
1693 TargetTransformInfo::OperandValueInfo
1694 VPCostContext::getOperandInfo(VPValue *V) const {
1695   if (!V->isLiveIn())
1696     return {};
1697 
1698   return TTI::getOperandInfo(V->getLiveInIRValue());
1699 }
1700