xref: /llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp (revision f76dae15862959deb62ec200e0958d532c908f30)
1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This is the LLVM vectorization plan. It represents a candidate for
11 /// vectorization, allowing to plan and optimize how to vectorize a given loop
12 /// before generating LLVM-IR.
13 /// The vectorizer uses vectorization plans to estimate the costs of potential
14 /// candidates and if profitable to execute the desired plan, generating vector
15 /// LLVM-IR code.
16 ///
17 //===----------------------------------------------------------------------===//
18 
19 #include "VPlan.h"
20 #include "LoopVectorizationPlanner.h"
21 #include "VPlanCFG.h"
22 #include "VPlanDominatorTree.h"
23 #include "VPlanPatternMatch.h"
24 #include "VPlanTransforms.h"
25 #include "VPlanUtils.h"
26 #include "llvm/ADT/PostOrderIterator.h"
27 #include "llvm/ADT/STLExtras.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/ADT/Twine.h"
31 #include "llvm/Analysis/DomTreeUpdater.h"
32 #include "llvm/Analysis/LoopInfo.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/CFG.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/Instruction.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/CommandLine.h"
42 #include "llvm/Support/Debug.h"
43 #include "llvm/Support/GraphWriter.h"
44 #include "llvm/Support/raw_ostream.h"
45 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
46 #include "llvm/Transforms/Utils/LoopVersioning.h"
47 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
48 #include <cassert>
49 #include <string>
50 #include <vector>
51 
52 using namespace llvm;
53 using namespace llvm::VPlanPatternMatch;
54 
55 namespace llvm {
56 extern cl::opt<bool> EnableVPlanNativePath;
57 }
58 extern cl::opt<unsigned> ForceTargetInstructionCost;
59 
60 static cl::opt<bool> PrintVPlansInDotFormat(
61     "vplan-print-in-dot-format", cl::Hidden,
62     cl::desc("Use dot format instead of plain text when dumping VPlans"));
63 
64 #define DEBUG_TYPE "vplan"
65 
66 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
67 raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) {
68   const VPInstruction *Instr = dyn_cast<VPInstruction>(&V);
69   VPSlotTracker SlotTracker(
70       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
71   V.print(OS, SlotTracker);
72   return OS;
73 }
74 #endif
75 
76 Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder,
77                                 const ElementCount &VF) const {
78   switch (LaneKind) {
79   case VPLane::Kind::ScalableLast:
80     // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
81     return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
82                              Builder.getInt32(VF.getKnownMinValue() - Lane));
83   case VPLane::Kind::First:
84     return Builder.getInt32(Lane);
85   }
86   llvm_unreachable("Unknown lane kind");
87 }
88 
89 VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
90     : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
91   if (Def)
92     Def->addDefinedValue(this);
93 }
94 
95 VPValue::~VPValue() {
96   assert(Users.empty() && "trying to delete a VPValue with remaining users");
97   if (Def)
98     Def->removeDefinedValue(this);
99 }
100 
101 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
102 void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const {
103   if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def))
104     R->print(OS, "", SlotTracker);
105   else
106     printAsOperand(OS, SlotTracker);
107 }
108 
109 void VPValue::dump() const {
110   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
111   VPSlotTracker SlotTracker(
112       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
113   print(dbgs(), SlotTracker);
114   dbgs() << "\n";
115 }
116 
117 void VPDef::dump() const {
118   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
119   VPSlotTracker SlotTracker(
120       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
121   print(dbgs(), "", SlotTracker);
122   dbgs() << "\n";
123 }
124 #endif
125 
126 VPRecipeBase *VPValue::getDefiningRecipe() {
127   return cast_or_null<VPRecipeBase>(Def);
128 }
129 
130 const VPRecipeBase *VPValue::getDefiningRecipe() const {
131   return cast_or_null<VPRecipeBase>(Def);
132 }
133 
134 // Get the top-most entry block of \p Start. This is the entry block of the
135 // containing VPlan. This function is templated to support both const and non-const blocks
136 template <typename T> static T *getPlanEntry(T *Start) {
137   T *Next = Start;
138   T *Current = Start;
139   while ((Next = Next->getParent()))
140     Current = Next;
141 
142   SmallSetVector<T *, 8> WorkList;
143   WorkList.insert(Current);
144 
145   for (unsigned i = 0; i < WorkList.size(); i++) {
146     T *Current = WorkList[i];
147     if (Current->getNumPredecessors() == 0)
148       return Current;
149     auto &Predecessors = Current->getPredecessors();
150     WorkList.insert(Predecessors.begin(), Predecessors.end());
151   }
152 
153   llvm_unreachable("VPlan without any entry node without predecessors");
154 }
155 
156 VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
157 
158 const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
159 
160 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
161 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
162   const VPBlockBase *Block = this;
163   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
164     Block = Region->getEntry();
165   return cast<VPBasicBlock>(Block);
166 }
167 
168 VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
169   VPBlockBase *Block = this;
170   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
171     Block = Region->getEntry();
172   return cast<VPBasicBlock>(Block);
173 }
174 
175 void VPBlockBase::setPlan(VPlan *ParentPlan) {
176   assert(
177       (ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) &&
178       "Can only set plan on its entry or preheader block.");
179   Plan = ParentPlan;
180 }
181 
182 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
183 const VPBasicBlock *VPBlockBase::getExitingBasicBlock() const {
184   const VPBlockBase *Block = this;
185   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
186     Block = Region->getExiting();
187   return cast<VPBasicBlock>(Block);
188 }
189 
190 VPBasicBlock *VPBlockBase::getExitingBasicBlock() {
191   VPBlockBase *Block = this;
192   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
193     Block = Region->getExiting();
194   return cast<VPBasicBlock>(Block);
195 }
196 
197 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
198   if (!Successors.empty() || !Parent)
199     return this;
200   assert(Parent->getExiting() == this &&
201          "Block w/o successors not the exiting block of its parent.");
202   return Parent->getEnclosingBlockWithSuccessors();
203 }
204 
205 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
206   if (!Predecessors.empty() || !Parent)
207     return this;
208   assert(Parent->getEntry() == this &&
209          "Block w/o predecessors not the entry of its parent.");
210   return Parent->getEnclosingBlockWithPredecessors();
211 }
212 
213 void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
214   for (VPBlockBase *Block : to_vector(vp_depth_first_shallow(Entry)))
215     delete Block;
216 }
217 
218 VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
219   iterator It = begin();
220   while (It != end() && It->isPhi())
221     It++;
222   return It;
223 }
224 
225 VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
226                                    DominatorTree *DT, IRBuilderBase &Builder,
227                                    InnerLoopVectorizer *ILV, VPlan *Plan)
228     : VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
229       LVer(nullptr), TypeAnalysis(Plan->getCanonicalIV()->getScalarType()) {}
230 
231 Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
232   if (Def->isLiveIn())
233     return Def->getLiveInIRValue();
234 
235   if (hasScalarValue(Def, Instance)) {
236     return Data.VPV2Scalars[Def][Instance.Lane.mapToCacheIndex(VF)];
237   }
238   if (!Instance.Lane.isFirstLane() &&
239       vputils::isUniformAfterVectorization(Def) &&
240       hasScalarValue(Def, {Instance.Part, VPLane::getFirstLane()})) {
241     return Data.VPV2Scalars[Def][0];
242   }
243 
244   assert(hasVectorValue(Def));
245   auto *VecPart = Data.VPV2Vector[Def];
246   if (!VecPart->getType()->isVectorTy()) {
247     assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar");
248     return VecPart;
249   }
250   // TODO: Cache created scalar values.
251   Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF);
252   auto *Extract = Builder.CreateExtractElement(VecPart, Lane);
253   // set(Def, Extract, Instance);
254   return Extract;
255 }
256 
257 Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
258   if (NeedsScalar) {
259     assert((VF.isScalar() || Def->isLiveIn() || hasVectorValue(Def) ||
260             !vputils::onlyFirstLaneUsed(Def) ||
261             (hasScalarValue(Def, VPIteration(0, 0)) &&
262              Data.VPV2Scalars[Def].size() == 1)) &&
263            "Trying to access a single scalar per part but has multiple scalars "
264            "per part.");
265     return get(Def, VPIteration(0, 0));
266   }
267 
268   // If Values have been set for this Def return the one relevant for \p Part.
269   if (hasVectorValue(Def))
270     return Data.VPV2Vector[Def];
271 
272   auto GetBroadcastInstrs = [this, Def](Value *V) {
273     bool SafeToHoist = Def->isDefinedOutsideLoopRegions();
274     if (VF.isScalar())
275       return V;
276     // Place the code for broadcasting invariant variables in the new preheader.
277     IRBuilder<>::InsertPointGuard Guard(Builder);
278     if (SafeToHoist) {
279       BasicBlock *LoopVectorPreHeader = CFG.VPBB2IRBB[cast<VPBasicBlock>(
280           Plan->getVectorLoopRegion()->getSinglePredecessor())];
281       if (LoopVectorPreHeader)
282         Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
283     }
284 
285     // Place the code for broadcasting invariant variables in the new preheader.
286     // Broadcast the scalar into all locations in the vector.
287     Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
288 
289     return Shuf;
290   };
291 
292   if (!hasScalarValue(Def, {0, 0})) {
293     assert(Def->isLiveIn() && "expected a live-in");
294     Value *IRV = Def->getLiveInIRValue();
295     Value *B = GetBroadcastInstrs(IRV);
296     set(Def, B);
297     return B;
298   }
299 
300   Value *ScalarValue = get(Def, {0, 0});
301   // If we aren't vectorizing, we can just copy the scalar map values over
302   // to the vector map.
303   if (VF.isScalar()) {
304     set(Def, ScalarValue);
305     return ScalarValue;
306   }
307 
308   bool IsUniform = vputils::isUniformAfterVectorization(Def);
309 
310   unsigned LastLane = IsUniform ? 0 : VF.getKnownMinValue() - 1;
311   // Check if there is a scalar value for the selected lane.
312   if (!hasScalarValue(Def, {0, LastLane})) {
313     // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
314     // VPExpandSCEVRecipes can also be uniform.
315     assert((isa<VPWidenIntOrFpInductionRecipe>(Def->getDefiningRecipe()) ||
316             isa<VPScalarIVStepsRecipe>(Def->getDefiningRecipe()) ||
317             isa<VPExpandSCEVRecipe>(Def->getDefiningRecipe())) &&
318            "unexpected recipe found to be invariant");
319     IsUniform = true;
320     LastLane = 0;
321   }
322 
323   auto *LastInst = cast<Instruction>(get(Def, {0, LastLane}));
324   // Set the insert point after the last scalarized instruction or after the
325   // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
326   // will directly follow the scalar definitions.
327   auto OldIP = Builder.saveIP();
328   auto NewIP =
329       isa<PHINode>(LastInst)
330           ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
331           : std::next(BasicBlock::iterator(LastInst));
332   Builder.SetInsertPoint(&*NewIP);
333 
334   // However, if we are vectorizing, we need to construct the vector values.
335   // If the value is known to be uniform after vectorization, we can just
336   // broadcast the scalar value corresponding to lane zero for each unroll
337   // iteration. Otherwise, we construct the vector values using
338   // insertelement instructions. Since the resulting vectors are stored in
339   // State, we will only generate the insertelements once.
340   Value *VectorValue = nullptr;
341   if (IsUniform) {
342     VectorValue = GetBroadcastInstrs(ScalarValue);
343     set(Def, VectorValue);
344   } else {
345     // Initialize packing with insertelements to start from undef.
346     assert(!VF.isScalable() && "VF is assumed to be non scalable.");
347     Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF));
348     set(Def, Undef);
349     for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
350       packScalarIntoVectorValue(Def, {0, Lane});
351     VectorValue = get(Def);
352   }
353   Builder.restoreIP(OldIP);
354   return VectorValue;
355 }
356 
357 BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
358   VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
359   return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
360 }
361 
362 void VPTransformState::addNewMetadata(Instruction *To,
363                                       const Instruction *Orig) {
364   // If the loop was versioned with memchecks, add the corresponding no-alias
365   // metadata.
366   if (LVer && (isa<LoadInst>(Orig) || isa<StoreInst>(Orig)))
367     LVer->annotateInstWithNoAlias(To, Orig);
368 }
369 
370 void VPTransformState::addMetadata(Value *To, Instruction *From) {
371   // No source instruction to transfer metadata from?
372   if (!From)
373     return;
374 
375   if (Instruction *ToI = dyn_cast<Instruction>(To)) {
376     propagateMetadata(ToI, From);
377     addNewMetadata(ToI, From);
378   }
379 }
380 
381 void VPTransformState::setDebugLocFrom(DebugLoc DL) {
382   const DILocation *DIL = DL;
383   // When a FSDiscriminator is enabled, we don't need to add the multiply
384   // factors to the discriminators.
385   if (DIL &&
386       Builder.GetInsertBlock()
387           ->getParent()
388           ->shouldEmitDebugInfoForProfiling() &&
389       !EnableFSDiscriminator) {
390     // FIXME: For scalable vectors, assume vscale=1.
391     unsigned UF = Plan->getUF();
392     auto NewDIL =
393         DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
394     if (NewDIL)
395       Builder.SetCurrentDebugLocation(*NewDIL);
396     else
397       LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
398                         << DIL->getFilename() << " Line: " << DIL->getLine());
399   } else
400     Builder.SetCurrentDebugLocation(DIL);
401 }
402 
403 void VPTransformState::packScalarIntoVectorValue(VPValue *Def,
404                                                  const VPIteration &Instance) {
405   Value *ScalarInst = get(Def, Instance);
406   Value *VectorValue = get(Def);
407   VectorValue = Builder.CreateInsertElement(
408       VectorValue, ScalarInst, Instance.Lane.getAsRuntimeExpr(Builder, VF));
409   set(Def, VectorValue);
410 }
411 
412 BasicBlock *
413 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
414   // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
415   // Pred stands for Predessor. Prev stands for Previous - last visited/created.
416   BasicBlock *PrevBB = CFG.PrevBB;
417   BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
418                                          PrevBB->getParent(), CFG.ExitBB);
419   LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
420 
421   // Hook up the new basic block to its predecessors.
422   for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
423     VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
424     auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
425     BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
426 
427     assert(PredBB && "Predecessor basic-block not found building successor.");
428     auto *PredBBTerminator = PredBB->getTerminator();
429     LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
430 
431     auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
432     if (isa<UnreachableInst>(PredBBTerminator)) {
433       assert(PredVPSuccessors.size() == 1 &&
434              "Predecessor ending w/o branch must have single successor.");
435       DebugLoc DL = PredBBTerminator->getDebugLoc();
436       PredBBTerminator->eraseFromParent();
437       auto *Br = BranchInst::Create(NewBB, PredBB);
438       Br->setDebugLoc(DL);
439     } else if (TermBr && !TermBr->isConditional()) {
440       TermBr->setSuccessor(0, NewBB);
441     } else {
442       // Set each forward successor here when it is created, excluding
443       // backedges. A backward successor is set when the branch is created.
444       unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
445       assert(!TermBr->getSuccessor(idx) &&
446              "Trying to reset an existing successor block.");
447       TermBr->setSuccessor(idx, NewBB);
448     }
449     CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, NewBB}});
450   }
451   return NewBB;
452 }
453 
454 void VPIRBasicBlock::execute(VPTransformState *State) {
455   assert(getHierarchicalSuccessors().size() <= 2 &&
456          "VPIRBasicBlock can have at most two successors at the moment!");
457   State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
458   executeRecipes(State, getIRBasicBlock());
459   if (getSingleSuccessor()) {
460     assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
461     auto *Br = State->Builder.CreateBr(getIRBasicBlock());
462     Br->setOperand(0, nullptr);
463     getIRBasicBlock()->getTerminator()->eraseFromParent();
464   }
465 
466   for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
467     VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
468     BasicBlock *PredBB = State->CFG.VPBB2IRBB[PredVPBB];
469     assert(PredBB && "Predecessor basic-block not found building successor.");
470     LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
471 
472     auto *PredBBTerminator = PredBB->getTerminator();
473     auto *TermBr = cast<BranchInst>(PredBBTerminator);
474     // Set each forward successor here when it is created, excluding
475     // backedges. A backward successor is set when the branch is created.
476     const auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
477     unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
478     assert(!TermBr->getSuccessor(idx) &&
479            "Trying to reset an existing successor block.");
480     TermBr->setSuccessor(idx, IRBB);
481     State->CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, IRBB}});
482   }
483 }
484 
485 void VPBasicBlock::execute(VPTransformState *State) {
486   bool Replica = State->Instance && !State->Instance->isFirstIteration();
487   VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
488   VPBlockBase *SingleHPred = nullptr;
489   BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
490 
491   auto IsLoopRegion = [](VPBlockBase *BB) {
492     auto *R = dyn_cast<VPRegionBlock>(BB);
493     return R && !R->isReplicator();
494   };
495 
496   // 1. Create an IR basic block.
497   if (PrevVPBB && /* A */
498       !((SingleHPred = getSingleHierarchicalPredecessor()) &&
499         SingleHPred->getExitingBasicBlock() == PrevVPBB &&
500         PrevVPBB->getSingleHierarchicalSuccessor() &&
501         (SingleHPred->getParent() == getEnclosingLoopRegion() &&
502          !IsLoopRegion(SingleHPred))) &&         /* B */
503       !(Replica && getPredecessors().empty())) { /* C */
504     // The last IR basic block is reused, as an optimization, in three cases:
505     // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null;
506     // B. when the current VPBB has a single (hierarchical) predecessor which
507     //    is PrevVPBB and the latter has a single (hierarchical) successor which
508     //    both are in the same non-replicator region; and
509     // C. when the current VPBB is an entry of a region replica - where PrevVPBB
510     //    is the exiting VPBB of this region from a previous instance, or the
511     //    predecessor of this region.
512 
513     NewBB = createEmptyBasicBlock(State->CFG);
514     State->Builder.SetInsertPoint(NewBB);
515     // Temporarily terminate with unreachable until CFG is rewired.
516     UnreachableInst *Terminator = State->Builder.CreateUnreachable();
517     // Register NewBB in its loop. In innermost loops its the same for all
518     // BB's.
519     if (State->CurrentVectorLoop)
520       State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
521     State->Builder.SetInsertPoint(Terminator);
522     State->CFG.PrevBB = NewBB;
523   }
524 
525   // 2. Fill the IR basic block with IR instructions.
526   executeRecipes(State, NewBB);
527 }
528 
529 void VPBasicBlock::dropAllReferences(VPValue *NewValue) {
530   for (VPRecipeBase &R : Recipes) {
531     for (auto *Def : R.definedValues())
532       Def->replaceAllUsesWith(NewValue);
533 
534     for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
535       R.setOperand(I, NewValue);
536   }
537 }
538 
539 void VPBasicBlock::executeRecipes(VPTransformState *State, BasicBlock *BB) {
540   LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
541                     << " in BB:" << BB->getName() << '\n');
542 
543   State->CFG.VPBB2IRBB[this] = BB;
544   State->CFG.PrevVPBB = this;
545 
546   for (VPRecipeBase &Recipe : Recipes)
547     Recipe.execute(*State);
548 
549   LLVM_DEBUG(dbgs() << "LV: filled BB:" << *BB);
550 }
551 
552 VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
553   assert((SplitAt == end() || SplitAt->getParent() == this) &&
554          "can only split at a position in the same block");
555 
556   SmallVector<VPBlockBase *, 2> Succs(successors());
557   // First, disconnect the current block from its successors.
558   for (VPBlockBase *Succ : Succs)
559     VPBlockUtils::disconnectBlocks(this, Succ);
560 
561   // Create new empty block after the block to split.
562   auto *SplitBlock = new VPBasicBlock(getName() + ".split");
563   VPBlockUtils::insertBlockAfter(SplitBlock, this);
564 
565   // Add successors for block to split to new block.
566   for (VPBlockBase *Succ : Succs)
567     VPBlockUtils::connectBlocks(SplitBlock, Succ);
568 
569   // Finally, move the recipes starting at SplitAt to new block.
570   for (VPRecipeBase &ToMove :
571        make_early_inc_range(make_range(SplitAt, this->end())))
572     ToMove.moveBefore(*SplitBlock, SplitBlock->end());
573 
574   return SplitBlock;
575 }
576 
577 /// Return the enclosing loop region for region \p P. The templated version is
578 /// used to support both const and non-const block arguments.
579 template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) {
580   if (P && P->isReplicator()) {
581     P = P->getParent();
582     assert(!cast<VPRegionBlock>(P)->isReplicator() &&
583            "unexpected nested replicate regions");
584   }
585   return P;
586 }
587 
588 VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
589   return getEnclosingLoopRegionForRegion(getParent());
590 }
591 
592 const VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() const {
593   return getEnclosingLoopRegionForRegion(getParent());
594 }
595 
596 static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
597   if (VPBB->empty()) {
598     assert(
599         VPBB->getNumSuccessors() < 2 &&
600         "block with multiple successors doesn't have a recipe as terminator");
601     return false;
602   }
603 
604   const VPRecipeBase *R = &VPBB->back();
605   bool IsCondBranch = isa<VPBranchOnMaskRecipe>(R) ||
606                       match(R, m_BranchOnCond(m_VPValue())) ||
607                       match(R, m_BranchOnCount(m_VPValue(), m_VPValue()));
608   (void)IsCondBranch;
609 
610   if (VPBB->getNumSuccessors() >= 2 ||
611       (VPBB->isExiting() && !VPBB->getParent()->isReplicator())) {
612     assert(IsCondBranch && "block with multiple successors not terminated by "
613                            "conditional branch recipe");
614 
615     return true;
616   }
617 
618   assert(
619       !IsCondBranch &&
620       "block with 0 or 1 successors terminated by conditional branch recipe");
621   return false;
622 }
623 
624 VPRecipeBase *VPBasicBlock::getTerminator() {
625   if (hasConditionalTerminator(this))
626     return &back();
627   return nullptr;
628 }
629 
630 const VPRecipeBase *VPBasicBlock::getTerminator() const {
631   if (hasConditionalTerminator(this))
632     return &back();
633   return nullptr;
634 }
635 
636 bool VPBasicBlock::isExiting() const {
637   return getParent() && getParent()->getExitingBasicBlock() == this;
638 }
639 
640 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
641 void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
642   if (getSuccessors().empty()) {
643     O << Indent << "No successors\n";
644   } else {
645     O << Indent << "Successor(s): ";
646     ListSeparator LS;
647     for (auto *Succ : getSuccessors())
648       O << LS << Succ->getName();
649     O << '\n';
650   }
651 }
652 
653 void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
654                          VPSlotTracker &SlotTracker) const {
655   O << Indent << getName() << ":\n";
656 
657   auto RecipeIndent = Indent + "  ";
658   for (const VPRecipeBase &Recipe : *this) {
659     Recipe.print(O, RecipeIndent, SlotTracker);
660     O << '\n';
661   }
662 
663   printSuccessors(O, Indent);
664 }
665 #endif
666 
667 static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry);
668 
669 // Clone the CFG for all nodes reachable from \p Entry, this includes cloning
670 // the blocks and their recipes. Operands of cloned recipes will NOT be updated.
671 // Remapping of operands must be done separately. Returns a pair with the new
672 // entry and exiting blocks of the cloned region. If \p Entry isn't part of a
673 // region, return nullptr for the exiting block.
674 static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
675   DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks;
676   VPBlockBase *Exiting = nullptr;
677   bool InRegion = Entry->getParent();
678   // First, clone blocks reachable from Entry.
679   for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
680     VPBlockBase *NewBB = BB->clone();
681     Old2NewVPBlocks[BB] = NewBB;
682     if (InRegion && BB->getNumSuccessors() == 0) {
683       assert(!Exiting && "Multiple exiting blocks?");
684       Exiting = BB;
685     }
686   }
687   assert((!InRegion || Exiting) && "regions must have a single exiting block");
688 
689   // Second, update the predecessors & successors of the cloned blocks.
690   for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
691     VPBlockBase *NewBB = Old2NewVPBlocks[BB];
692     SmallVector<VPBlockBase *> NewPreds;
693     for (VPBlockBase *Pred : BB->getPredecessors()) {
694       NewPreds.push_back(Old2NewVPBlocks[Pred]);
695     }
696     NewBB->setPredecessors(NewPreds);
697     SmallVector<VPBlockBase *> NewSuccs;
698     for (VPBlockBase *Succ : BB->successors()) {
699       NewSuccs.push_back(Old2NewVPBlocks[Succ]);
700     }
701     NewBB->setSuccessors(NewSuccs);
702   }
703 
704 #if !defined(NDEBUG)
705   // Verify that the order of predecessors and successors matches in the cloned
706   // version.
707   for (const auto &[OldBB, NewBB] :
708        zip(vp_depth_first_shallow(Entry),
709            vp_depth_first_shallow(Old2NewVPBlocks[Entry]))) {
710     for (const auto &[OldPred, NewPred] :
711          zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
712       assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors");
713 
714     for (const auto &[OldSucc, NewSucc] :
715          zip(OldBB->successors(), NewBB->successors()))
716       assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors");
717   }
718 #endif
719 
720   return std::make_pair(Old2NewVPBlocks[Entry],
721                         Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
722 }
723 
724 VPRegionBlock *VPRegionBlock::clone() {
725   const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
726   auto *NewRegion =
727       new VPRegionBlock(NewEntry, NewExiting, getName(), isReplicator());
728   for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
729     Block->setParent(NewRegion);
730   return NewRegion;
731 }
732 
733 void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
734   for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
735     // Drop all references in VPBasicBlocks and replace all uses with
736     // DummyValue.
737     Block->dropAllReferences(NewValue);
738 }
739 
740 void VPRegionBlock::execute(VPTransformState *State) {
741   ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
742       RPOT(Entry);
743 
744   if (!isReplicator()) {
745     // Create and register the new vector loop.
746     Loop *PrevLoop = State->CurrentVectorLoop;
747     State->CurrentVectorLoop = State->LI->AllocateLoop();
748     BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
749     Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
750 
751     // Insert the new loop into the loop nest and register the new basic blocks
752     // before calling any utilities such as SCEV that require valid LoopInfo.
753     if (ParentLoop)
754       ParentLoop->addChildLoop(State->CurrentVectorLoop);
755     else
756       State->LI->addTopLevelLoop(State->CurrentVectorLoop);
757 
758     // Visit the VPBlocks connected to "this", starting from it.
759     for (VPBlockBase *Block : RPOT) {
760       LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
761       Block->execute(State);
762     }
763 
764     State->CurrentVectorLoop = PrevLoop;
765     return;
766   }
767 
768   assert(!State->Instance && "Replicating a Region with non-null instance.");
769 
770   // Enter replicating mode.
771   State->Instance = VPIteration(0, 0);
772     assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
773     for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
774          ++Lane) {
775       State->Instance->Lane = VPLane(Lane, VPLane::Kind::First);
776       // Visit the VPBlocks connected to \p this, starting from it.
777       for (VPBlockBase *Block : RPOT) {
778         LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
779         Block->execute(State);
780       }
781   }
782 
783   // Exit replicating mode.
784   State->Instance.reset();
785 }
786 
787 InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) {
788   InstructionCost Cost = 0;
789   for (VPRecipeBase &R : Recipes)
790     Cost += R.cost(VF, Ctx);
791   return Cost;
792 }
793 
794 InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) {
795   if (!isReplicator()) {
796     InstructionCost Cost = 0;
797     for (VPBlockBase *Block : vp_depth_first_shallow(getEntry()))
798       Cost += Block->cost(VF, Ctx);
799     InstructionCost BackedgeCost =
800         ForceTargetInstructionCost.getNumOccurrences()
801             ? InstructionCost(ForceTargetInstructionCost.getNumOccurrences())
802             : Ctx.TTI.getCFInstrCost(Instruction::Br, TTI::TCK_RecipThroughput);
803     LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF
804                       << ": vector loop backedge\n");
805     Cost += BackedgeCost;
806     return Cost;
807   }
808 
809   // Compute the cost of a replicate region. Replicating isn't supported for
810   // scalable vectors, return an invalid cost for them.
811   // TODO: Discard scalable VPlans with replicate recipes earlier after
812   // construction.
813   if (VF.isScalable())
814     return InstructionCost::getInvalid();
815 
816   // First compute the cost of the conditionally executed recipes, followed by
817   // account for the branching cost, except if the mask is a header mask or
818   // uniform condition.
819   using namespace llvm::VPlanPatternMatch;
820   VPBasicBlock *Then = cast<VPBasicBlock>(getEntry()->getSuccessors()[0]);
821   InstructionCost ThenCost = Then->cost(VF, Ctx);
822 
823   // For the scalar case, we may not always execute the original predicated
824   // block, Thus, scale the block's cost by the probability of executing it.
825   if (VF.isScalar())
826     return ThenCost / getReciprocalPredBlockProb();
827 
828   return ThenCost;
829 }
830 
831 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
832 void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
833                           VPSlotTracker &SlotTracker) const {
834   O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
835   auto NewIndent = Indent + "  ";
836   for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
837     O << '\n';
838     BlockBase->print(O, NewIndent, SlotTracker);
839   }
840   O << Indent << "}\n";
841 
842   printSuccessors(O, Indent);
843 }
844 #endif
845 
846 VPlan::~VPlan() {
847   for (auto &KV : LiveOuts)
848     delete KV.second;
849   LiveOuts.clear();
850 
851   if (Entry) {
852     VPValue DummyValue;
853     for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
854       Block->dropAllReferences(&DummyValue);
855 
856     VPBlockBase::deleteCFG(Entry);
857 
858     Preheader->dropAllReferences(&DummyValue);
859     delete Preheader;
860   }
861   for (VPValue *VPV : VPLiveInsToFree)
862     delete VPV;
863   if (BackedgeTakenCount)
864     delete BackedgeTakenCount;
865 }
866 
867 static VPIRBasicBlock *createVPIRBasicBlockFor(BasicBlock *BB) {
868   auto *VPIRBB = new VPIRBasicBlock(BB);
869   for (Instruction &I :
870        make_range(BB->begin(), BB->getTerminator()->getIterator()))
871     VPIRBB->appendRecipe(new VPIRInstruction(I));
872   return VPIRBB;
873 }
874 
875 VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
876                                    PredicatedScalarEvolution &PSE,
877                                    bool RequiresScalarEpilogueCheck,
878                                    bool TailFolded, Loop *TheLoop) {
879   VPIRBasicBlock *Entry = createVPIRBasicBlockFor(TheLoop->getLoopPreheader());
880   VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
881   auto Plan = std::make_unique<VPlan>(Entry, VecPreheader);
882 
883   // Create SCEV and VPValue for the trip count.
884   const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
885   assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && "Invalid loop count");
886   ScalarEvolution &SE = *PSE.getSE();
887   const SCEV *TripCount =
888       SE.getTripCountFromExitCount(BackedgeTakenCount, InductionTy, TheLoop);
889   Plan->TripCount =
890       vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE);
891 
892   // Create VPRegionBlock, with empty header and latch blocks, to be filled
893   // during processing later.
894   VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body");
895   VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch");
896   VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB);
897   auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop",
898                                       false /*isReplicator*/);
899 
900   VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader);
901   VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block");
902   VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
903 
904   VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
905   if (!RequiresScalarEpilogueCheck) {
906     VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
907     return Plan;
908   }
909 
910   // If needed, add a check in the middle block to see if we have completed
911   // all of the iterations in the first vector loop.  Three cases:
912   // 1) If (N - N%VF) == N, then we *don't* need to run the remainder.
913   //    Thus if tail is to be folded, we know we don't need to run the
914   //    remainder and we can set the condition to true.
915   // 2) If we require a scalar epilogue, there is no conditional branch as
916   //    we unconditionally branch to the scalar preheader.  Do nothing.
917   // 3) Otherwise, construct a runtime check.
918   BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock();
919   auto *VPExitBlock = createVPIRBasicBlockFor(IRExitBlock);
920   // The connection order corresponds to the operands of the conditional branch.
921   VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB);
922   VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
923 
924   auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator();
925   // Here we use the same DebugLoc as the scalar loop latch terminator instead
926   // of the corresponding compare because they may have ended up with
927   // different line numbers and we want to avoid awkward line stepping while
928   // debugging. Eg. if the compare has got a line number inside the loop.
929   VPBuilder Builder(MiddleVPBB);
930   VPValue *Cmp =
931       TailFolded
932           ? Plan->getOrAddLiveIn(ConstantInt::getTrue(
933                 IntegerType::getInt1Ty(TripCount->getType()->getContext())))
934           : Builder.createICmp(CmpInst::ICMP_EQ, Plan->getTripCount(),
935                                &Plan->getVectorTripCount(),
936                                ScalarLatchTerm->getDebugLoc(), "cmp.n");
937   Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
938                        ScalarLatchTerm->getDebugLoc());
939   return Plan;
940 }
941 
942 void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
943                              Value *CanonicalIVStartValue,
944                              VPTransformState &State) {
945   Type *TCTy = TripCountV->getType();
946   // Check if the backedge taken count is needed, and if so build it.
947   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
948     IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
949     auto *TCMO = Builder.CreateSub(TripCountV, ConstantInt::get(TCTy, 1),
950                                    "trip.count.minus.1");
951     BackedgeTakenCount->setUnderlyingValue(TCMO);
952   }
953 
954   VectorTripCount.setUnderlyingValue(VectorTripCountV);
955 
956   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
957   // FIXME: Model VF * UF computation completely in VPlan.
958   assert(VFxUF.getNumUsers() && "VFxUF expected to always have users");
959   unsigned UF = getUF();
960   if (VF.getNumUsers()) {
961     Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF);
962     VF.setUnderlyingValue(RuntimeVF);
963     VFxUF.setUnderlyingValue(
964         UF > 1 ? Builder.CreateMul(RuntimeVF, ConstantInt::get(TCTy, UF))
965                : RuntimeVF);
966   } else {
967     VFxUF.setUnderlyingValue(createStepForVF(Builder, TCTy, State.VF, UF));
968   }
969 
970   // When vectorizing the epilogue loop, the canonical induction start value
971   // needs to be changed from zero to the value after the main vector loop.
972   // FIXME: Improve modeling for canonical IV start values in the epilogue loop.
973   if (CanonicalIVStartValue) {
974     VPValue *VPV = getOrAddLiveIn(CanonicalIVStartValue);
975     auto *IV = getCanonicalIV();
976     assert(all_of(IV->users(),
977                   [](const VPUser *U) {
978                     return isa<VPScalarIVStepsRecipe>(U) ||
979                            isa<VPScalarCastRecipe>(U) ||
980                            isa<VPDerivedIVRecipe>(U) ||
981                            cast<VPInstruction>(U)->getOpcode() ==
982                                Instruction::Add;
983                   }) &&
984            "the canonical IV should only be used by its increment or "
985            "ScalarIVSteps when resetting the start value");
986     IV->setOperand(0, VPV);
987   }
988 }
989 
990 /// Replace \p VPBB with a VPIRBasicBlock wrapping \p IRBB. All recipes from \p
991 /// VPBB are moved to the end of the newly created VPIRBasicBlock. VPBB must
992 /// have a single predecessor, which is rewired to the new VPIRBasicBlock. All
993 /// successors of VPBB, if any, are rewired to the new VPIRBasicBlock.
994 static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
995   VPIRBasicBlock *IRVPBB = createVPIRBasicBlockFor(IRBB);
996   for (auto &R : make_early_inc_range(*VPBB)) {
997     assert(!R.isPhi() && "Tried to move phi recipe to end of block");
998     R.moveBefore(*IRVPBB, IRVPBB->end());
999   }
1000   VPBlockBase *PredVPBB = VPBB->getSinglePredecessor();
1001   VPBlockUtils::disconnectBlocks(PredVPBB, VPBB);
1002   VPBlockUtils::connectBlocks(PredVPBB, IRVPBB);
1003   for (auto *Succ : to_vector(VPBB->getSuccessors())) {
1004     VPBlockUtils::connectBlocks(IRVPBB, Succ);
1005     VPBlockUtils::disconnectBlocks(VPBB, Succ);
1006   }
1007   delete VPBB;
1008 }
1009 
1010 /// Generate the code inside the preheader and body of the vectorized loop.
1011 /// Assumes a single pre-header basic-block was created for this. Introduce
1012 /// additional basic-blocks as needed, and fill them all.
1013 void VPlan::execute(VPTransformState *State) {
1014   // Initialize CFG state.
1015   State->CFG.PrevVPBB = nullptr;
1016   State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
1017   BasicBlock *VectorPreHeader = State->CFG.PrevBB;
1018   State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
1019 
1020   // Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
1021   cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
1022   State->CFG.DTU.applyUpdates(
1023       {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
1024 
1025   // Replace regular VPBB's for the middle and scalar preheader blocks with
1026   // VPIRBasicBlocks wrapping their IR blocks. The IR blocks are created during
1027   // skeleton creation, so we can only create the VPIRBasicBlocks now during
1028   // VPlan execution rather than earlier during VPlan construction.
1029   BasicBlock *MiddleBB = State->CFG.ExitBB;
1030   VPBasicBlock *MiddleVPBB =
1031       cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
1032   // Find the VPBB for the scalar preheader, relying on the current structure
1033   // when creating the middle block and its successrs: if there's a single
1034   // predecessor, it must be the scalar preheader. Otherwise, the second
1035   // successor is the scalar preheader.
1036   BasicBlock *ScalarPh = MiddleBB->getSingleSuccessor();
1037   auto &MiddleSuccs = MiddleVPBB->getSuccessors();
1038   assert((MiddleSuccs.size() == 1 || MiddleSuccs.size() == 2) &&
1039          "middle block has unexpected successors");
1040   VPBasicBlock *ScalarPhVPBB = cast<VPBasicBlock>(
1041       MiddleSuccs.size() == 1 ? MiddleSuccs[0] : MiddleSuccs[1]);
1042   assert(!isa<VPIRBasicBlock>(ScalarPhVPBB) &&
1043          "scalar preheader cannot be wrapped already");
1044   replaceVPBBWithIRVPBB(ScalarPhVPBB, ScalarPh);
1045   replaceVPBBWithIRVPBB(MiddleVPBB, MiddleBB);
1046 
1047   // Disconnect the middle block from its single successor (the scalar loop
1048   // header) in both the CFG and DT. The branch will be recreated during VPlan
1049   // execution.
1050   auto *BrInst = new UnreachableInst(MiddleBB->getContext());
1051   BrInst->insertBefore(MiddleBB->getTerminator());
1052   MiddleBB->getTerminator()->eraseFromParent();
1053   State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
1054 
1055   // Generate code in the loop pre-header and body.
1056   for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
1057     Block->execute(State);
1058 
1059   VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
1060   BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
1061 
1062   // Fix the latch value of canonical, reduction and first-order recurrences
1063   // phis in the vector loop.
1064   VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
1065   for (VPRecipeBase &R : Header->phis()) {
1066     // Skip phi-like recipes that generate their backedege values themselves.
1067     if (isa<VPWidenPHIRecipe>(&R))
1068       continue;
1069 
1070     if (isa<VPWidenPointerInductionRecipe>(&R) ||
1071         isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1072       PHINode *Phi = nullptr;
1073       if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1074         Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
1075       } else {
1076         auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1077         assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1078                "recipe generating only scalars should have been replaced");
1079         auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1080         Phi = cast<PHINode>(GEP->getPointerOperand());
1081       }
1082 
1083       Phi->setIncomingBlock(1, VectorLatchBB);
1084 
1085       // Move the last step to the end of the latch block. This ensures
1086       // consistent placement of all induction updates.
1087       Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1088       Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
1089 
1090       // Use the steps for the last part as backedge value for the induction.
1091       if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1092         Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
1093       continue;
1094     }
1095 
1096     auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1097     bool NeedsScalar =
1098         isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1099         (isa<VPReductionPHIRecipe>(PhiR) &&
1100          cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1101     Value *Phi = State->get(PhiR, NeedsScalar);
1102     Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
1103     cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1104   }
1105 
1106   State->CFG.DTU.flush();
1107   assert(State->CFG.DTU.getDomTree().verify(
1108              DominatorTree::VerificationLevel::Fast) &&
1109          "DT not preserved correctly");
1110 }
1111 
1112 InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
1113   // For now only return the cost of the vector loop region, ignoring any other
1114   // blocks, like the preheader or middle blocks.
1115   return getVectorLoopRegion()->cost(VF, Ctx);
1116 }
1117 
1118 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1119 void VPlan::printLiveIns(raw_ostream &O) const {
1120   VPSlotTracker SlotTracker(this);
1121 
1122   if (VF.getNumUsers() > 0) {
1123     O << "\nLive-in ";
1124     VF.printAsOperand(O, SlotTracker);
1125     O << " = VF";
1126   }
1127 
1128   if (VFxUF.getNumUsers() > 0) {
1129     O << "\nLive-in ";
1130     VFxUF.printAsOperand(O, SlotTracker);
1131     O << " = VF * UF";
1132   }
1133 
1134   if (VectorTripCount.getNumUsers() > 0) {
1135     O << "\nLive-in ";
1136     VectorTripCount.printAsOperand(O, SlotTracker);
1137     O << " = vector-trip-count";
1138   }
1139 
1140   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1141     O << "\nLive-in ";
1142     BackedgeTakenCount->printAsOperand(O, SlotTracker);
1143     O << " = backedge-taken count";
1144   }
1145 
1146   O << "\n";
1147   if (TripCount->isLiveIn())
1148     O << "Live-in ";
1149   TripCount->printAsOperand(O, SlotTracker);
1150   O << " = original trip-count";
1151   O << "\n";
1152 }
1153 
1154 LLVM_DUMP_METHOD
1155 void VPlan::print(raw_ostream &O) const {
1156   VPSlotTracker SlotTracker(this);
1157 
1158   O << "VPlan '" << getName() << "' {";
1159 
1160   printLiveIns(O);
1161 
1162   if (!getPreheader()->empty()) {
1163     O << "\n";
1164     getPreheader()->print(O, "", SlotTracker);
1165   }
1166 
1167   for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) {
1168     O << '\n';
1169     Block->print(O, "", SlotTracker);
1170   }
1171 
1172   if (!LiveOuts.empty())
1173     O << "\n";
1174   for (const auto &KV : LiveOuts) {
1175     KV.second->print(O, SlotTracker);
1176   }
1177 
1178   O << "}\n";
1179 }
1180 
1181 std::string VPlan::getName() const {
1182   std::string Out;
1183   raw_string_ostream RSO(Out);
1184   RSO << Name << " for ";
1185   if (!VFs.empty()) {
1186     RSO << "VF={" << VFs[0];
1187     for (ElementCount VF : drop_begin(VFs))
1188       RSO << "," << VF;
1189     RSO << "},";
1190   }
1191 
1192   if (UFs.empty()) {
1193     RSO << "UF>=1";
1194   } else {
1195     RSO << "UF={" << UFs[0];
1196     for (unsigned UF : drop_begin(UFs))
1197       RSO << "," << UF;
1198     RSO << "}";
1199   }
1200 
1201   return Out;
1202 }
1203 
1204 LLVM_DUMP_METHOD
1205 void VPlan::printDOT(raw_ostream &O) const {
1206   VPlanPrinter Printer(O, *this);
1207   Printer.dump();
1208 }
1209 
1210 LLVM_DUMP_METHOD
1211 void VPlan::dump() const { print(dbgs()); }
1212 #endif
1213 
1214 void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
1215   assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
1216   LiveOuts.insert({PN, new VPLiveOut(PN, V)});
1217 }
1218 
1219 static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
1220                           DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
1221   // Update the operands of all cloned recipes starting at NewEntry. This
1222   // traverses all reachable blocks. This is done in two steps, to handle cycles
1223   // in PHI recipes.
1224   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
1225       OldDeepRPOT(Entry);
1226   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
1227       NewDeepRPOT(NewEntry);
1228   // First, collect all mappings from old to new VPValues defined by cloned
1229   // recipes.
1230   for (const auto &[OldBB, NewBB] :
1231        zip(VPBlockUtils::blocksOnly<VPBasicBlock>(OldDeepRPOT),
1232            VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT))) {
1233     assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() &&
1234            "blocks must have the same number of recipes");
1235     for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
1236       assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1237              "recipes must have the same number of operands");
1238       assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1239              "recipes must define the same number of operands");
1240       for (const auto &[OldV, NewV] :
1241            zip(OldR.definedValues(), NewR.definedValues()))
1242         Old2NewVPValues[OldV] = NewV;
1243     }
1244   }
1245 
1246   // Update all operands to use cloned VPValues.
1247   for (VPBasicBlock *NewBB :
1248        VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT)) {
1249     for (VPRecipeBase &NewR : *NewBB)
1250       for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1251         VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
1252         NewR.setOperand(I, NewOp);
1253       }
1254   }
1255 }
1256 
1257 VPlan *VPlan::duplicate() {
1258   // Clone blocks.
1259   VPBasicBlock *NewPreheader = Preheader->clone();
1260   const auto &[NewEntry, __] = cloneFrom(Entry);
1261 
1262   // Create VPlan, clone live-ins and remap operands in the cloned blocks.
1263   auto *NewPlan = new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry));
1264   DenseMap<VPValue *, VPValue *> Old2NewVPValues;
1265   for (VPValue *OldLiveIn : VPLiveInsToFree) {
1266     Old2NewVPValues[OldLiveIn] =
1267         NewPlan->getOrAddLiveIn(OldLiveIn->getLiveInIRValue());
1268   }
1269   Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
1270   Old2NewVPValues[&VF] = &NewPlan->VF;
1271   Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
1272   if (BackedgeTakenCount) {
1273     NewPlan->BackedgeTakenCount = new VPValue();
1274     Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;
1275   }
1276   assert(TripCount && "trip count must be set");
1277   if (TripCount->isLiveIn())
1278     Old2NewVPValues[TripCount] =
1279         NewPlan->getOrAddLiveIn(TripCount->getLiveInIRValue());
1280   // else NewTripCount will be created and inserted into Old2NewVPValues when
1281   // TripCount is cloned. In any case NewPlan->TripCount is updated below.
1282 
1283   remapOperands(Preheader, NewPreheader, Old2NewVPValues);
1284   remapOperands(Entry, NewEntry, Old2NewVPValues);
1285 
1286   // Clone live-outs.
1287   for (const auto &[_, LO] : LiveOuts)
1288     NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
1289 
1290   // Initialize remaining fields of cloned VPlan.
1291   NewPlan->VFs = VFs;
1292   NewPlan->UFs = UFs;
1293   // TODO: Adjust names.
1294   NewPlan->Name = Name;
1295   assert(Old2NewVPValues.contains(TripCount) &&
1296          "TripCount must have been added to Old2NewVPValues");
1297   NewPlan->TripCount = Old2NewVPValues[TripCount];
1298   return NewPlan;
1299 }
1300 
1301 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1302 
1303 Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
1304   return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
1305          Twine(getOrCreateBID(Block));
1306 }
1307 
1308 Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
1309   const std::string &Name = Block->getName();
1310   if (!Name.empty())
1311     return Name;
1312   return "VPB" + Twine(getOrCreateBID(Block));
1313 }
1314 
1315 void VPlanPrinter::dump() {
1316   Depth = 1;
1317   bumpIndent(0);
1318   OS << "digraph VPlan {\n";
1319   OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1320   if (!Plan.getName().empty())
1321     OS << "\\n" << DOT::EscapeString(Plan.getName());
1322 
1323   {
1324     // Print live-ins.
1325   std::string Str;
1326   raw_string_ostream SS(Str);
1327   Plan.printLiveIns(SS);
1328   SmallVector<StringRef, 0> Lines;
1329   StringRef(Str).rtrim('\n').split(Lines, "\n");
1330   for (auto Line : Lines)
1331     OS << DOT::EscapeString(Line.str()) << "\\n";
1332   }
1333 
1334   OS << "\"]\n";
1335   OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
1336   OS << "edge [fontname=Courier, fontsize=30]\n";
1337   OS << "compound=true\n";
1338 
1339   dumpBlock(Plan.getPreheader());
1340 
1341   for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
1342     dumpBlock(Block);
1343 
1344   OS << "}\n";
1345 }
1346 
1347 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
1348   if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
1349     dumpBasicBlock(BasicBlock);
1350   else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1351     dumpRegion(Region);
1352   else
1353     llvm_unreachable("Unsupported kind of VPBlock.");
1354 }
1355 
1356 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
1357                             bool Hidden, const Twine &Label) {
1358   // Due to "dot" we print an edge between two regions as an edge between the
1359   // exiting basic block and the entry basic of the respective regions.
1360   const VPBlockBase *Tail = From->getExitingBasicBlock();
1361   const VPBlockBase *Head = To->getEntryBasicBlock();
1362   OS << Indent << getUID(Tail) << " -> " << getUID(Head);
1363   OS << " [ label=\"" << Label << '\"';
1364   if (Tail != From)
1365     OS << " ltail=" << getUID(From);
1366   if (Head != To)
1367     OS << " lhead=" << getUID(To);
1368   if (Hidden)
1369     OS << "; splines=none";
1370   OS << "]\n";
1371 }
1372 
1373 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
1374   auto &Successors = Block->getSuccessors();
1375   if (Successors.size() == 1)
1376     drawEdge(Block, Successors.front(), false, "");
1377   else if (Successors.size() == 2) {
1378     drawEdge(Block, Successors.front(), false, "T");
1379     drawEdge(Block, Successors.back(), false, "F");
1380   } else {
1381     unsigned SuccessorNumber = 0;
1382     for (auto *Successor : Successors)
1383       drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
1384   }
1385 }
1386 
1387 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
1388   // Implement dot-formatted dump by performing plain-text dump into the
1389   // temporary storage followed by some post-processing.
1390   OS << Indent << getUID(BasicBlock) << " [label =\n";
1391   bumpIndent(1);
1392   std::string Str;
1393   raw_string_ostream SS(Str);
1394   // Use no indentation as we need to wrap the lines into quotes ourselves.
1395   BasicBlock->print(SS, "", SlotTracker);
1396 
1397   // We need to process each line of the output separately, so split
1398   // single-string plain-text dump.
1399   SmallVector<StringRef, 0> Lines;
1400   StringRef(Str).rtrim('\n').split(Lines, "\n");
1401 
1402   auto EmitLine = [&](StringRef Line, StringRef Suffix) {
1403     OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
1404   };
1405 
1406   // Don't need the "+" after the last line.
1407   for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
1408     EmitLine(Line, " +\n");
1409   EmitLine(Lines.back(), "\n");
1410 
1411   bumpIndent(-1);
1412   OS << Indent << "]\n";
1413 
1414   dumpEdges(BasicBlock);
1415 }
1416 
1417 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
1418   OS << Indent << "subgraph " << getUID(Region) << " {\n";
1419   bumpIndent(1);
1420   OS << Indent << "fontname=Courier\n"
1421      << Indent << "label=\""
1422      << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
1423      << DOT::EscapeString(Region->getName()) << "\"\n";
1424   // Dump the blocks of the region.
1425   assert(Region->getEntry() && "Region contains no inner blocks.");
1426   for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry()))
1427     dumpBlock(Block);
1428   bumpIndent(-1);
1429   OS << Indent << "}\n";
1430   dumpEdges(Region);
1431 }
1432 
1433 void VPlanIngredient::print(raw_ostream &O) const {
1434   if (auto *Inst = dyn_cast<Instruction>(V)) {
1435     if (!Inst->getType()->isVoidTy()) {
1436       Inst->printAsOperand(O, false);
1437       O << " = ";
1438     }
1439     O << Inst->getOpcodeName() << " ";
1440     unsigned E = Inst->getNumOperands();
1441     if (E > 0) {
1442       Inst->getOperand(0)->printAsOperand(O, false);
1443       for (unsigned I = 1; I < E; ++I)
1444         Inst->getOperand(I)->printAsOperand(O << ", ", false);
1445     }
1446   } else // !Inst
1447     V->printAsOperand(O, false);
1448 }
1449 
1450 #endif
1451 
1452 bool VPValue::isDefinedOutsideLoopRegions() const {
1453   return !hasDefiningRecipe() ||
1454          !getDefiningRecipe()->getParent()->getEnclosingLoopRegion();
1455 }
1456 
1457 void VPValue::replaceAllUsesWith(VPValue *New) {
1458   replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
1459 }
1460 
1461 void VPValue::replaceUsesWithIf(
1462     VPValue *New,
1463     llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) {
1464   // Note that this early exit is required for correctness; the implementation
1465   // below relies on the number of users for this VPValue to decrease, which
1466   // isn't the case if this == New.
1467   if (this == New)
1468     return;
1469 
1470   for (unsigned J = 0; J < getNumUsers();) {
1471     VPUser *User = Users[J];
1472     bool RemovedUser = false;
1473     for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) {
1474       if (User->getOperand(I) != this || !ShouldReplace(*User, I))
1475         continue;
1476 
1477       RemovedUser = true;
1478       User->setOperand(I, New);
1479     }
1480     // If a user got removed after updating the current user, the next user to
1481     // update will be moved to the current position, so we only need to
1482     // increment the index if the number of users did not change.
1483     if (!RemovedUser)
1484       J++;
1485   }
1486 }
1487 
1488 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1489 void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const {
1490   OS << Tracker.getOrCreateName(this);
1491 }
1492 
1493 void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
1494   interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
1495     Op->printAsOperand(O, SlotTracker);
1496   });
1497 }
1498 #endif
1499 
1500 void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
1501                                           Old2NewTy &Old2New,
1502                                           InterleavedAccessInfo &IAI) {
1503   ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
1504       RPOT(Region->getEntry());
1505   for (VPBlockBase *Base : RPOT) {
1506     visitBlock(Base, Old2New, IAI);
1507   }
1508 }
1509 
1510 void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
1511                                          InterleavedAccessInfo &IAI) {
1512   if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
1513     for (VPRecipeBase &VPI : *VPBB) {
1514       if (isa<VPWidenPHIRecipe>(&VPI))
1515         continue;
1516       assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
1517       auto *VPInst = cast<VPInstruction>(&VPI);
1518 
1519       auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
1520       if (!Inst)
1521         continue;
1522       auto *IG = IAI.getInterleaveGroup(Inst);
1523       if (!IG)
1524         continue;
1525 
1526       auto NewIGIter = Old2New.find(IG);
1527       if (NewIGIter == Old2New.end())
1528         Old2New[IG] = new InterleaveGroup<VPInstruction>(
1529             IG->getFactor(), IG->isReverse(), IG->getAlign());
1530 
1531       if (Inst == IG->getInsertPos())
1532         Old2New[IG]->setInsertPos(VPInst);
1533 
1534       InterleaveGroupMap[VPInst] = Old2New[IG];
1535       InterleaveGroupMap[VPInst]->insertMember(
1536           VPInst, IG->getIndex(Inst),
1537           Align(IG->isReverse() ? (-1) * int(IG->getFactor())
1538                                 : IG->getFactor()));
1539     }
1540   } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1541     visitRegion(Region, Old2New, IAI);
1542   else
1543     llvm_unreachable("Unsupported kind of VPBlock.");
1544 }
1545 
1546 VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
1547                                                  InterleavedAccessInfo &IAI) {
1548   Old2NewTy Old2New;
1549   visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI);
1550 }
1551 
1552 void VPSlotTracker::assignName(const VPValue *V) {
1553   assert(!VPValue2Name.contains(V) && "VPValue already has a name!");
1554   auto *UV = V->getUnderlyingValue();
1555   if (!UV) {
1556     VPValue2Name[V] = (Twine("vp<%") + Twine(NextSlot) + ">").str();
1557     NextSlot++;
1558     return;
1559   }
1560 
1561   // Use the name of the underlying Value, wrapped in "ir<>", and versioned by
1562   // appending ".Number" to the name if there are multiple uses.
1563   std::string Name;
1564   raw_string_ostream S(Name);
1565   UV->printAsOperand(S, false);
1566   assert(!Name.empty() && "Name cannot be empty.");
1567   std::string BaseName = (Twine("ir<") + Name + Twine(">")).str();
1568 
1569   // First assign the base name for V.
1570   const auto &[A, _] = VPValue2Name.insert({V, BaseName});
1571   // Integer or FP constants with different types will result in he same string
1572   // due to stripping types.
1573   if (V->isLiveIn() && isa<ConstantInt, ConstantFP>(UV))
1574     return;
1575 
1576   // If it is already used by C > 0 other VPValues, increase the version counter
1577   // C and use it for V.
1578   const auto &[C, UseInserted] = BaseName2Version.insert({BaseName, 0});
1579   if (!UseInserted) {
1580     C->second++;
1581     A->second = (BaseName + Twine(".") + Twine(C->second)).str();
1582   }
1583 }
1584 
1585 void VPSlotTracker::assignNames(const VPlan &Plan) {
1586   if (Plan.VF.getNumUsers() > 0)
1587     assignName(&Plan.VF);
1588   if (Plan.VFxUF.getNumUsers() > 0)
1589     assignName(&Plan.VFxUF);
1590   assignName(&Plan.VectorTripCount);
1591   if (Plan.BackedgeTakenCount)
1592     assignName(Plan.BackedgeTakenCount);
1593   for (VPValue *LI : Plan.VPLiveInsToFree)
1594     assignName(LI);
1595   assignNames(Plan.getPreheader());
1596 
1597   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
1598       RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
1599   for (const VPBasicBlock *VPBB :
1600        VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
1601     assignNames(VPBB);
1602 }
1603 
1604 void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
1605   for (const VPRecipeBase &Recipe : *VPBB)
1606     for (VPValue *Def : Recipe.definedValues())
1607       assignName(Def);
1608 }
1609 
1610 std::string VPSlotTracker::getOrCreateName(const VPValue *V) const {
1611   std::string Name = VPValue2Name.lookup(V);
1612   if (!Name.empty())
1613     return Name;
1614 
1615   // If no name was assigned, no VPlan was provided when creating the slot
1616   // tracker or it is not reachable from the provided VPlan. This can happen,
1617   // e.g. when trying to print a recipe that has not been inserted into a VPlan
1618   // in a debugger.
1619   // TODO: Update VPSlotTracker constructor to assign names to recipes &
1620   // VPValues not associated with a VPlan, instead of constructing names ad-hoc
1621   // here.
1622   const VPRecipeBase *DefR = V->getDefiningRecipe();
1623   (void)DefR;
1624   assert((!DefR || !DefR->getParent() || !DefR->getParent()->getPlan()) &&
1625          "VPValue defined by a recipe in a VPlan?");
1626 
1627   // Use the underlying value's name, if there is one.
1628   if (auto *UV = V->getUnderlyingValue()) {
1629     std::string Name;
1630     raw_string_ostream S(Name);
1631     UV->printAsOperand(S, false);
1632     return (Twine("ir<") + Name + ">").str();
1633   }
1634 
1635   return "<badref>";
1636 }
1637 
1638 bool LoopVectorizationPlanner::getDecisionAndClampRange(
1639     const std::function<bool(ElementCount)> &Predicate, VFRange &Range) {
1640   assert(!Range.isEmpty() && "Trying to test an empty VF range.");
1641   bool PredicateAtRangeStart = Predicate(Range.Start);
1642 
1643   for (ElementCount TmpVF : VFRange(Range.Start * 2, Range.End))
1644     if (Predicate(TmpVF) != PredicateAtRangeStart) {
1645       Range.End = TmpVF;
1646       break;
1647     }
1648 
1649   return PredicateAtRangeStart;
1650 }
1651 
1652 /// Build VPlans for the full range of feasible VF's = {\p MinVF, 2 * \p MinVF,
1653 /// 4 * \p MinVF, ..., \p MaxVF} by repeatedly building a VPlan for a sub-range
1654 /// of VF's starting at a given VF and extending it as much as possible. Each
1655 /// vectorization decision can potentially shorten this sub-range during
1656 /// buildVPlan().
1657 void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF,
1658                                            ElementCount MaxVF) {
1659   auto MaxVFTimes2 = MaxVF * 2;
1660   for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
1661     VFRange SubRange = {VF, MaxVFTimes2};
1662     auto Plan = buildVPlan(SubRange);
1663     VPlanTransforms::optimize(*Plan);
1664     VPlans.push_back(std::move(Plan));
1665     VF = SubRange.End;
1666   }
1667 }
1668 
1669 VPlan &LoopVectorizationPlanner::getPlanFor(ElementCount VF) const {
1670   assert(count_if(VPlans,
1671                   [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
1672              1 &&
1673          "Multiple VPlans for VF.");
1674 
1675   for (const VPlanPtr &Plan : VPlans) {
1676     if (Plan->hasVF(VF))
1677       return *Plan.get();
1678   }
1679   llvm_unreachable("No plan found!");
1680 }
1681 
1682 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1683 void LoopVectorizationPlanner::printPlans(raw_ostream &O) {
1684   if (VPlans.empty()) {
1685     O << "LV: No VPlans built.\n";
1686     return;
1687   }
1688   for (const auto &Plan : VPlans)
1689     if (PrintVPlansInDotFormat)
1690       Plan->printDOT(O);
1691     else
1692       Plan->print(O);
1693 }
1694 #endif
1695