xref: /llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp (revision bdada7546e6b4a189a22c7ba9ce2d1b507b9c22e)
1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This is the LLVM vectorization plan. It represents a candidate for
11 /// vectorization, allowing to plan and optimize how to vectorize a given loop
12 /// before generating LLVM-IR.
13 /// The vectorizer uses vectorization plans to estimate the costs of potential
14 /// candidates and if profitable to execute the desired plan, generating vector
15 /// LLVM-IR code.
16 ///
17 //===----------------------------------------------------------------------===//
18 
19 #include "VPlan.h"
20 #include "VPlanDominatorTree.h"
21 #include "llvm/ADT/DepthFirstIterator.h"
22 #include "llvm/ADT/PostOrderIterator.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/Analysis/IVDescriptors.h"
27 #include "llvm/Analysis/LoopInfo.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/CFG.h"
30 #include "llvm/IR/InstrTypes.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/Support/Casting.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/GenericDomTreeConstruction.h"
40 #include "llvm/Support/GraphWriter.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
43 #include <cassert>
44 #include <iterator>
45 #include <string>
46 #include <vector>
47 
48 using namespace llvm;
49 extern cl::opt<bool> EnableVPlanNativePath;
50 
51 #define DEBUG_TYPE "vplan"
52 
53 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
54 raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) {
55   const VPInstruction *Instr = dyn_cast<VPInstruction>(&V);
56   VPSlotTracker SlotTracker(
57       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
58   V.print(OS, SlotTracker);
59   return OS;
60 }
61 #endif
62 
63 Value *VPLane::getAsRuntimeExpr(IRBuilder<> &Builder,
64                                 const ElementCount &VF) const {
65   switch (LaneKind) {
66   case VPLane::Kind::ScalableLast:
67     // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
68     return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
69                              Builder.getInt32(VF.getKnownMinValue() - Lane));
70   case VPLane::Kind::First:
71     return Builder.getInt32(Lane);
72   }
73   llvm_unreachable("Unknown lane kind");
74 }
75 
76 VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
77     : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
78   if (Def)
79     Def->addDefinedValue(this);
80 }
81 
82 VPValue::~VPValue() {
83   assert(Users.empty() && "trying to delete a VPValue with remaining users");
84   if (Def)
85     Def->removeDefinedValue(this);
86 }
87 
88 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
89 void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const {
90   if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def))
91     R->print(OS, "", SlotTracker);
92   else
93     printAsOperand(OS, SlotTracker);
94 }
95 
96 void VPValue::dump() const {
97   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
98   VPSlotTracker SlotTracker(
99       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
100   print(dbgs(), SlotTracker);
101   dbgs() << "\n";
102 }
103 
104 void VPDef::dump() const {
105   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
106   VPSlotTracker SlotTracker(
107       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
108   print(dbgs(), "", SlotTracker);
109   dbgs() << "\n";
110 }
111 #endif
112 
113 // Get the top-most entry block of \p Start. This is the entry block of the
114 // containing VPlan. This function is templated to support both const and non-const blocks
115 template <typename T> static T *getPlanEntry(T *Start) {
116   T *Next = Start;
117   T *Current = Start;
118   while ((Next = Next->getParent()))
119     Current = Next;
120 
121   SmallSetVector<T *, 8> WorkList;
122   WorkList.insert(Current);
123 
124   for (unsigned i = 0; i < WorkList.size(); i++) {
125     T *Current = WorkList[i];
126     if (Current->getNumPredecessors() == 0)
127       return Current;
128     auto &Predecessors = Current->getPredecessors();
129     WorkList.insert(Predecessors.begin(), Predecessors.end());
130   }
131 
132   llvm_unreachable("VPlan without any entry node without predecessors");
133 }
134 
135 VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
136 
137 const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
138 
139 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
140 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
141   const VPBlockBase *Block = this;
142   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
143     Block = Region->getEntry();
144   return cast<VPBasicBlock>(Block);
145 }
146 
147 VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
148   VPBlockBase *Block = this;
149   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
150     Block = Region->getEntry();
151   return cast<VPBasicBlock>(Block);
152 }
153 
154 void VPBlockBase::setPlan(VPlan *ParentPlan) {
155   assert(ParentPlan->getEntry() == this &&
156          "Can only set plan on its entry block.");
157   Plan = ParentPlan;
158 }
159 
160 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
161 const VPBasicBlock *VPBlockBase::getExitBasicBlock() const {
162   const VPBlockBase *Block = this;
163   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
164     Block = Region->getExit();
165   return cast<VPBasicBlock>(Block);
166 }
167 
168 VPBasicBlock *VPBlockBase::getExitBasicBlock() {
169   VPBlockBase *Block = this;
170   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
171     Block = Region->getExit();
172   return cast<VPBasicBlock>(Block);
173 }
174 
175 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
176   if (!Successors.empty() || !Parent)
177     return this;
178   assert(Parent->getExit() == this &&
179          "Block w/o successors not the exit of its parent.");
180   return Parent->getEnclosingBlockWithSuccessors();
181 }
182 
183 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
184   if (!Predecessors.empty() || !Parent)
185     return this;
186   assert(Parent->getEntry() == this &&
187          "Block w/o predecessors not the entry of its parent.");
188   return Parent->getEnclosingBlockWithPredecessors();
189 }
190 
191 static VPValue *getSingleOperandOrNull(VPUser &U) {
192   if (U.getNumOperands() == 1)
193     return U.getOperand(0);
194 
195   return nullptr;
196 }
197 
198 static const VPValue *getSingleOperandOrNull(const VPUser &U) {
199   if (U.getNumOperands() == 1)
200     return U.getOperand(0);
201 
202   return nullptr;
203 }
204 
205 static void resetSingleOpUser(VPUser &U, VPValue *NewVal) {
206   assert(U.getNumOperands() <= 1 && "Didn't expect more than one operand!");
207   if (!NewVal) {
208     if (U.getNumOperands() == 1)
209       U.removeLastOperand();
210     return;
211   }
212 
213   if (U.getNumOperands() == 1)
214     U.setOperand(0, NewVal);
215   else
216     U.addOperand(NewVal);
217 }
218 
219 VPValue *VPBlockBase::getCondBit() {
220   return getSingleOperandOrNull(CondBitUser);
221 }
222 
223 const VPValue *VPBlockBase::getCondBit() const {
224   return getSingleOperandOrNull(CondBitUser);
225 }
226 
227 void VPBlockBase::setCondBit(VPValue *CV) {
228   resetSingleOpUser(CondBitUser, CV);
229 }
230 
231 VPValue *VPBlockBase::getPredicate() {
232   return getSingleOperandOrNull(PredicateUser);
233 }
234 
235 const VPValue *VPBlockBase::getPredicate() const {
236   return getSingleOperandOrNull(PredicateUser);
237 }
238 
239 void VPBlockBase::setPredicate(VPValue *CV) {
240   resetSingleOpUser(PredicateUser, CV);
241 }
242 
243 void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
244   SmallVector<VPBlockBase *, 8> Blocks(depth_first(Entry));
245 
246   for (VPBlockBase *Block : Blocks)
247     delete Block;
248 }
249 
250 VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
251   iterator It = begin();
252   while (It != end() && It->isPhi())
253     It++;
254   return It;
255 }
256 
257 Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
258   if (!Def->getDef())
259     return Def->getLiveInIRValue();
260 
261   if (hasScalarValue(Def, Instance)) {
262     return Data
263         .PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)];
264   }
265 
266   assert(hasVectorValue(Def, Instance.Part));
267   auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
268   if (!VecPart->getType()->isVectorTy()) {
269     assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar");
270     return VecPart;
271   }
272   // TODO: Cache created scalar values.
273   Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF);
274   auto *Extract = Builder.CreateExtractElement(VecPart, Lane);
275   // set(Def, Extract, Instance);
276   return Extract;
277 }
278 
279 BasicBlock *
280 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
281   // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
282   // Pred stands for Predessor. Prev stands for Previous - last visited/created.
283   BasicBlock *PrevBB = CFG.PrevBB;
284   BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
285                                          PrevBB->getParent(), CFG.LastBB);
286   LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
287 
288   // Hook up the new basic block to its predecessors.
289   for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
290     VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock();
291     auto &PredVPSuccessors = PredVPBB->getSuccessors();
292     BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
293 
294     // In outer loop vectorization scenario, the predecessor BBlock may not yet
295     // be visited(backedge). Mark the VPBasicBlock for fixup at the end of
296     // vectorization. We do not encounter this case in inner loop vectorization
297     // as we start out by building a loop skeleton with the vector loop header
298     // and latch blocks. As a result, we never enter this function for the
299     // header block in the non VPlan-native path.
300     if (!PredBB) {
301       assert(EnableVPlanNativePath &&
302              "Unexpected null predecessor in non VPlan-native path");
303       CFG.VPBBsToFix.push_back(PredVPBB);
304       continue;
305     }
306 
307     assert(PredBB && "Predecessor basic-block not found building successor.");
308     auto *PredBBTerminator = PredBB->getTerminator();
309     LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
310     if (isa<UnreachableInst>(PredBBTerminator)) {
311       assert(PredVPSuccessors.size() == 1 &&
312              "Predecessor ending w/o branch must have single successor.");
313       PredBBTerminator->eraseFromParent();
314       BranchInst::Create(NewBB, PredBB);
315     } else {
316       assert(PredVPSuccessors.size() == 2 &&
317              "Predecessor ending with branch must have two successors.");
318       unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
319       assert(!PredBBTerminator->getSuccessor(idx) &&
320              "Trying to reset an existing successor block.");
321       PredBBTerminator->setSuccessor(idx, NewBB);
322     }
323   }
324   return NewBB;
325 }
326 
327 void VPBasicBlock::execute(VPTransformState *State) {
328   bool Replica = State->Instance && !State->Instance->isFirstIteration();
329   VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
330   VPBlockBase *SingleHPred = nullptr;
331   BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
332 
333   // 1. Create an IR basic block, or reuse the last one if possible.
334   // The last IR basic block is reused, as an optimization, in three cases:
335   // A. the first VPBB reuses the loop header BB - when PrevVPBB is null;
336   // B. when the current VPBB has a single (hierarchical) predecessor which
337   //    is PrevVPBB and the latter has a single (hierarchical) successor; and
338   // C. when the current VPBB is an entry of a region replica - where PrevVPBB
339   //    is the exit of this region from a previous instance, or the predecessor
340   //    of this region.
341   if (PrevVPBB && /* A */
342       !((SingleHPred = getSingleHierarchicalPredecessor()) &&
343         SingleHPred->getExitBasicBlock() == PrevVPBB &&
344         PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */
345       !(Replica && getPredecessors().empty())) {       /* C */
346     NewBB = createEmptyBasicBlock(State->CFG);
347     State->Builder.SetInsertPoint(NewBB);
348     // Temporarily terminate with unreachable until CFG is rewired.
349     UnreachableInst *Terminator = State->Builder.CreateUnreachable();
350     State->Builder.SetInsertPoint(Terminator);
351     // Register NewBB in its loop. In innermost loops its the same for all BB's.
352     Loop *L = State->LI->getLoopFor(State->CFG.LastBB);
353     L->addBasicBlockToLoop(NewBB, *State->LI);
354     State->CFG.PrevBB = NewBB;
355   }
356 
357   // 2. Fill the IR basic block with IR instructions.
358   LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
359                     << " in BB:" << NewBB->getName() << '\n');
360 
361   State->CFG.VPBB2IRBB[this] = NewBB;
362   State->CFG.PrevVPBB = this;
363 
364   for (VPRecipeBase &Recipe : Recipes)
365     Recipe.execute(*State);
366 
367   VPValue *CBV;
368   if (EnableVPlanNativePath && (CBV = getCondBit())) {
369     assert(CBV->getUnderlyingValue() &&
370            "Unexpected null underlying value for condition bit");
371 
372     // Condition bit value in a VPBasicBlock is used as the branch selector. In
373     // the VPlan-native path case, since all branches are uniform we generate a
374     // branch instruction using the condition value from vector lane 0 and dummy
375     // successors. The successors are fixed later when the successor blocks are
376     // visited.
377     Value *NewCond = State->get(CBV, {0, 0});
378 
379     // Replace the temporary unreachable terminator with the new conditional
380     // branch.
381     auto *CurrentTerminator = NewBB->getTerminator();
382     assert(isa<UnreachableInst>(CurrentTerminator) &&
383            "Expected to replace unreachable terminator with conditional "
384            "branch.");
385     auto *CondBr = BranchInst::Create(NewBB, nullptr, NewCond);
386     CondBr->setSuccessor(0, nullptr);
387     ReplaceInstWithInst(CurrentTerminator, CondBr);
388   }
389 
390   LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
391 }
392 
393 void VPBasicBlock::dropAllReferences(VPValue *NewValue) {
394   for (VPRecipeBase &R : Recipes) {
395     for (auto *Def : R.definedValues())
396       Def->replaceAllUsesWith(NewValue);
397 
398     for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
399       R.setOperand(I, NewValue);
400   }
401 }
402 
403 VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
404   assert((SplitAt == end() || SplitAt->getParent() == this) &&
405          "can only split at a position in the same block");
406 
407   SmallVector<VPBlockBase *, 2> Succs(getSuccessors().begin(),
408                                       getSuccessors().end());
409   // First, disconnect the current block from its successors.
410   for (VPBlockBase *Succ : Succs)
411     VPBlockUtils::disconnectBlocks(this, Succ);
412 
413   // Create new empty block after the block to split.
414   auto *SplitBlock = new VPBasicBlock(getName() + ".split");
415   VPBlockUtils::insertBlockAfter(SplitBlock, this);
416 
417   // Add successors for block to split to new block.
418   for (VPBlockBase *Succ : Succs)
419     VPBlockUtils::connectBlocks(SplitBlock, Succ);
420 
421   // Finally, move the recipes starting at SplitAt to new block.
422   for (VPRecipeBase &ToMove :
423        make_early_inc_range(make_range(SplitAt, this->end())))
424     ToMove.moveBefore(*SplitBlock, SplitBlock->end());
425 
426   return SplitBlock;
427 }
428 
429 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
430 void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
431                          VPSlotTracker &SlotTracker) const {
432   O << Indent << getName() << ":\n";
433   if (const VPValue *Pred = getPredicate()) {
434     O << Indent << "BlockPredicate:";
435     Pred->printAsOperand(O, SlotTracker);
436     if (const auto *PredInst = dyn_cast<VPInstruction>(Pred))
437       O << " (" << PredInst->getParent()->getName() << ")";
438     O << '\n';
439   }
440 
441   auto RecipeIndent = Indent + "  ";
442   for (const VPRecipeBase &Recipe : *this) {
443     Recipe.print(O, RecipeIndent, SlotTracker);
444     O << '\n';
445   }
446 
447   if (getSuccessors().empty()) {
448     O << Indent << "No successors\n";
449   } else {
450     O << Indent << "Successor(s): ";
451     ListSeparator LS;
452     for (auto *Succ : getSuccessors())
453       O << LS << Succ->getName();
454     O << '\n';
455   }
456 
457   if (const VPValue *CBV = getCondBit()) {
458     O << Indent << "CondBit: ";
459     CBV->printAsOperand(O, SlotTracker);
460     if (const auto *CBI = dyn_cast<VPInstruction>(CBV))
461       O << " (" << CBI->getParent()->getName() << ")";
462     O << '\n';
463   }
464 }
465 #endif
466 
467 void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
468   for (VPBlockBase *Block : depth_first(Entry))
469     // Drop all references in VPBasicBlocks and replace all uses with
470     // DummyValue.
471     Block->dropAllReferences(NewValue);
472 }
473 
474 void VPRegionBlock::execute(VPTransformState *State) {
475   ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry);
476 
477   if (!isReplicator()) {
478     // Visit the VPBlocks connected to "this", starting from it.
479     for (VPBlockBase *Block : RPOT) {
480       if (EnableVPlanNativePath) {
481         // The inner loop vectorization path does not represent loop preheader
482         // and exit blocks as part of the VPlan. In the VPlan-native path, skip
483         // vectorizing loop preheader block. In future, we may replace this
484         // check with the check for loop preheader.
485         if (Block->getNumPredecessors() == 0)
486           continue;
487 
488         // Skip vectorizing loop exit block. In future, we may replace this
489         // check with the check for loop exit.
490         if (Block->getNumSuccessors() == 0)
491           continue;
492       }
493 
494       LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
495       Block->execute(State);
496     }
497     return;
498   }
499 
500   assert(!State->Instance && "Replicating a Region with non-null instance.");
501 
502   // Enter replicating mode.
503   State->Instance = VPIteration(0, 0);
504 
505   for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
506     State->Instance->Part = Part;
507     assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
508     for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
509          ++Lane) {
510       State->Instance->Lane = VPLane(Lane, VPLane::Kind::First);
511       // Visit the VPBlocks connected to \p this, starting from it.
512       for (VPBlockBase *Block : RPOT) {
513         LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
514         Block->execute(State);
515       }
516     }
517   }
518 
519   // Exit replicating mode.
520   State->Instance.reset();
521 }
522 
523 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
524 void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
525                           VPSlotTracker &SlotTracker) const {
526   O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
527   auto NewIndent = Indent + "  ";
528   for (auto *BlockBase : depth_first(Entry)) {
529     O << '\n';
530     BlockBase->print(O, NewIndent, SlotTracker);
531   }
532   O << Indent << "}\n";
533 }
534 #endif
535 
536 bool VPRecipeBase::mayHaveSideEffects() const {
537   switch (getVPDefID()) {
538   case VPBranchOnMaskSC:
539     return false;
540   case VPBlendSC:
541   case VPWidenSC:
542   case VPWidenGEPSC:
543   case VPReductionSC:
544   case VPWidenSelectSC: {
545     const Instruction *I =
546         dyn_cast_or_null<Instruction>(getVPValue()->getUnderlyingValue());
547     (void)I;
548     assert((!I || !I->mayHaveSideEffects()) &&
549            "underlying instruction has side-effects");
550     return false;
551   }
552   case VPReplicateSC: {
553     auto *R = cast<VPReplicateRecipe>(this);
554     return R->getUnderlyingInstr()->mayHaveSideEffects();
555   }
556   default:
557     return true;
558   }
559 }
560 
561 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
562   assert(!Parent && "Recipe already in some VPBasicBlock");
563   assert(InsertPos->getParent() &&
564          "Insertion position not in any VPBasicBlock");
565   Parent = InsertPos->getParent();
566   Parent->getRecipeList().insert(InsertPos->getIterator(), this);
567 }
568 
569 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
570   assert(!Parent && "Recipe already in some VPBasicBlock");
571   assert(InsertPos->getParent() &&
572          "Insertion position not in any VPBasicBlock");
573   Parent = InsertPos->getParent();
574   Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
575 }
576 
577 void VPRecipeBase::removeFromParent() {
578   assert(getParent() && "Recipe not in any VPBasicBlock");
579   getParent()->getRecipeList().remove(getIterator());
580   Parent = nullptr;
581 }
582 
583 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
584   assert(getParent() && "Recipe not in any VPBasicBlock");
585   return getParent()->getRecipeList().erase(getIterator());
586 }
587 
588 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
589   removeFromParent();
590   insertAfter(InsertPos);
591 }
592 
593 void VPRecipeBase::moveBefore(VPBasicBlock &BB,
594                               iplist<VPRecipeBase>::iterator I) {
595   assert(I == BB.end() || I->getParent() == &BB);
596   removeFromParent();
597   Parent = &BB;
598   BB.getRecipeList().insert(I, this);
599 }
600 
601 void VPInstruction::generateInstruction(VPTransformState &State,
602                                         unsigned Part) {
603   IRBuilder<> &Builder = State.Builder;
604 
605   if (Instruction::isBinaryOp(getOpcode())) {
606     Value *A = State.get(getOperand(0), Part);
607     Value *B = State.get(getOperand(1), Part);
608     Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
609     State.set(this, V, Part);
610     return;
611   }
612 
613   switch (getOpcode()) {
614   case VPInstruction::Not: {
615     Value *A = State.get(getOperand(0), Part);
616     Value *V = Builder.CreateNot(A);
617     State.set(this, V, Part);
618     break;
619   }
620   case VPInstruction::ICmpULE: {
621     Value *IV = State.get(getOperand(0), Part);
622     Value *TC = State.get(getOperand(1), Part);
623     Value *V = Builder.CreateICmpULE(IV, TC);
624     State.set(this, V, Part);
625     break;
626   }
627   case Instruction::Select: {
628     Value *Cond = State.get(getOperand(0), Part);
629     Value *Op1 = State.get(getOperand(1), Part);
630     Value *Op2 = State.get(getOperand(2), Part);
631     Value *V = Builder.CreateSelect(Cond, Op1, Op2);
632     State.set(this, V, Part);
633     break;
634   }
635   case VPInstruction::ActiveLaneMask: {
636     // Get first lane of vector induction variable.
637     Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
638     // Get the original loop tripcount.
639     Value *ScalarTC = State.TripCount;
640 
641     auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
642     auto *PredTy = FixedVectorType::get(Int1Ty, State.VF.getKnownMinValue());
643     Instruction *Call = Builder.CreateIntrinsic(
644         Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
645         {VIVElem0, ScalarTC}, nullptr, "active.lane.mask");
646     State.set(this, Call, Part);
647     break;
648   }
649   default:
650     llvm_unreachable("Unsupported opcode for instruction");
651   }
652 }
653 
654 void VPInstruction::execute(VPTransformState &State) {
655   assert(!State.Instance && "VPInstruction executing an Instance");
656   for (unsigned Part = 0; Part < State.UF; ++Part)
657     generateInstruction(State, Part);
658 }
659 
660 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
661 void VPInstruction::dump() const {
662   VPSlotTracker SlotTracker(getParent()->getPlan());
663   print(dbgs(), "", SlotTracker);
664 }
665 
666 void VPInstruction::print(raw_ostream &O, const Twine &Indent,
667                           VPSlotTracker &SlotTracker) const {
668   O << Indent << "EMIT ";
669 
670   if (hasResult()) {
671     printAsOperand(O, SlotTracker);
672     O << " = ";
673   }
674 
675   switch (getOpcode()) {
676   case VPInstruction::Not:
677     O << "not";
678     break;
679   case VPInstruction::ICmpULE:
680     O << "icmp ule";
681     break;
682   case VPInstruction::SLPLoad:
683     O << "combined load";
684     break;
685   case VPInstruction::SLPStore:
686     O << "combined store";
687     break;
688   case VPInstruction::ActiveLaneMask:
689     O << "active lane mask";
690     break;
691 
692   default:
693     O << Instruction::getOpcodeName(getOpcode());
694   }
695 
696   for (const VPValue *Operand : operands()) {
697     O << " ";
698     Operand->printAsOperand(O, SlotTracker);
699   }
700 }
701 #endif
702 
703 /// Generate the code inside the body of the vectorized loop. Assumes a single
704 /// LoopVectorBody basic-block was created for this. Introduce additional
705 /// basic-blocks as needed, and fill them all.
706 void VPlan::execute(VPTransformState *State) {
707   // -1. Check if the backedge taken count is needed, and if so build it.
708   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
709     Value *TC = State->TripCount;
710     IRBuilder<> Builder(State->CFG.PrevBB->getTerminator());
711     auto *TCMO = Builder.CreateSub(TC, ConstantInt::get(TC->getType(), 1),
712                                    "trip.count.minus.1");
713     auto VF = State->VF;
714     Value *VTCMO =
715         VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast");
716     for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part)
717       State->set(BackedgeTakenCount, VTCMO, Part);
718   }
719 
720   // 0. Set the reverse mapping from VPValues to Values for code generation.
721   for (auto &Entry : Value2VPValue)
722     State->VPValue2Value[Entry.second] = Entry.first;
723 
724   BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB;
725   BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor();
726   assert(VectorHeaderBB && "Loop preheader does not have a single successor.");
727 
728   // 1. Make room to generate basic-blocks inside loop body if needed.
729   BasicBlock *VectorLatchBB = VectorHeaderBB->splitBasicBlock(
730       VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch");
731   Loop *L = State->LI->getLoopFor(VectorHeaderBB);
732   L->addBasicBlockToLoop(VectorLatchBB, *State->LI);
733   // Remove the edge between Header and Latch to allow other connections.
734   // Temporarily terminate with unreachable until CFG is rewired.
735   // Note: this asserts the generated code's assumption that
736   // getFirstInsertionPt() can be dereferenced into an Instruction.
737   VectorHeaderBB->getTerminator()->eraseFromParent();
738   State->Builder.SetInsertPoint(VectorHeaderBB);
739   UnreachableInst *Terminator = State->Builder.CreateUnreachable();
740   State->Builder.SetInsertPoint(Terminator);
741 
742   // 2. Generate code in loop body.
743   State->CFG.PrevVPBB = nullptr;
744   State->CFG.PrevBB = VectorHeaderBB;
745   State->CFG.LastBB = VectorLatchBB;
746 
747   for (VPBlockBase *Block : depth_first(Entry))
748     Block->execute(State);
749 
750   // Setup branch terminator successors for VPBBs in VPBBsToFix based on
751   // VPBB's successors.
752   for (auto VPBB : State->CFG.VPBBsToFix) {
753     assert(EnableVPlanNativePath &&
754            "Unexpected VPBBsToFix in non VPlan-native path");
755     BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB];
756     assert(BB && "Unexpected null basic block for VPBB");
757 
758     unsigned Idx = 0;
759     auto *BBTerminator = BB->getTerminator();
760 
761     for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) {
762       VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock();
763       BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]);
764       ++Idx;
765     }
766   }
767 
768   // 3. Merge the temporary latch created with the last basic-block filled.
769   BasicBlock *LastBB = State->CFG.PrevBB;
770   // Connect LastBB to VectorLatchBB to facilitate their merge.
771   assert((EnableVPlanNativePath ||
772           isa<UnreachableInst>(LastBB->getTerminator())) &&
773          "Expected InnerLoop VPlan CFG to terminate with unreachable");
774   assert((!EnableVPlanNativePath || isa<BranchInst>(LastBB->getTerminator())) &&
775          "Expected VPlan CFG to terminate with branch in NativePath");
776   LastBB->getTerminator()->eraseFromParent();
777   BranchInst::Create(VectorLatchBB, LastBB);
778 
779   // Merge LastBB with Latch.
780   bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI);
781   (void)Merged;
782   assert(Merged && "Could not merge last basic block with latch.");
783   VectorLatchBB = LastBB;
784 
785   // We do not attempt to preserve DT for outer loop vectorization currently.
786   if (!EnableVPlanNativePath)
787     updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB,
788                         L->getExitBlock());
789 }
790 
791 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
792 LLVM_DUMP_METHOD
793 void VPlan::print(raw_ostream &O) const {
794   VPSlotTracker SlotTracker(this);
795 
796   O << "VPlan '" << Name << "' {";
797   for (const VPBlockBase *Block : depth_first(getEntry())) {
798     O << '\n';
799     Block->print(O, "", SlotTracker);
800   }
801   O << "}\n";
802 }
803 
804 LLVM_DUMP_METHOD
805 void VPlan::printDOT(raw_ostream &O) const {
806   VPlanPrinter Printer(O, *this);
807   Printer.dump();
808 }
809 
810 LLVM_DUMP_METHOD
811 void VPlan::dump() const { print(dbgs()); }
812 #endif
813 
814 void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
815                                 BasicBlock *LoopLatchBB,
816                                 BasicBlock *LoopExitBB) {
817   BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor();
818   assert(LoopHeaderBB && "Loop preheader does not have a single successor.");
819   // The vector body may be more than a single basic-block by this point.
820   // Update the dominator tree information inside the vector body by propagating
821   // it from header to latch, expecting only triangular control-flow, if any.
822   BasicBlock *PostDomSucc = nullptr;
823   for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
824     // Get the list of successors of this block.
825     std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
826     assert(Succs.size() <= 2 &&
827            "Basic block in vector loop has more than 2 successors.");
828     PostDomSucc = Succs[0];
829     if (Succs.size() == 1) {
830       assert(PostDomSucc->getSinglePredecessor() &&
831              "PostDom successor has more than one predecessor.");
832       DT->addNewBlock(PostDomSucc, BB);
833       continue;
834     }
835     BasicBlock *InterimSucc = Succs[1];
836     if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
837       PostDomSucc = Succs[1];
838       InterimSucc = Succs[0];
839     }
840     assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
841            "One successor of a basic block does not lead to the other.");
842     assert(InterimSucc->getSinglePredecessor() &&
843            "Interim successor has more than one predecessor.");
844     assert(PostDomSucc->hasNPredecessors(2) &&
845            "PostDom successor has more than two predecessors.");
846     DT->addNewBlock(InterimSucc, BB);
847     DT->addNewBlock(PostDomSucc, BB);
848   }
849   // Latch block is a new dominator for the loop exit.
850   DT->changeImmediateDominator(LoopExitBB, LoopLatchBB);
851   assert(DT->verify(DominatorTree::VerificationLevel::Fast));
852 }
853 
854 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
855 const Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
856   return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
857          Twine(getOrCreateBID(Block));
858 }
859 
860 const Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
861   const std::string &Name = Block->getName();
862   if (!Name.empty())
863     return Name;
864   return "VPB" + Twine(getOrCreateBID(Block));
865 }
866 
867 void VPlanPrinter::dump() {
868   Depth = 1;
869   bumpIndent(0);
870   OS << "digraph VPlan {\n";
871   OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
872   if (!Plan.getName().empty())
873     OS << "\\n" << DOT::EscapeString(Plan.getName());
874   if (Plan.BackedgeTakenCount) {
875     OS << ", where:\\n";
876     Plan.BackedgeTakenCount->print(OS, SlotTracker);
877     OS << " := BackedgeTakenCount";
878   }
879   OS << "\"]\n";
880   OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
881   OS << "edge [fontname=Courier, fontsize=30]\n";
882   OS << "compound=true\n";
883 
884   for (const VPBlockBase *Block : depth_first(Plan.getEntry()))
885     dumpBlock(Block);
886 
887   OS << "}\n";
888 }
889 
890 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
891   if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
892     dumpBasicBlock(BasicBlock);
893   else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
894     dumpRegion(Region);
895   else
896     llvm_unreachable("Unsupported kind of VPBlock.");
897 }
898 
899 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
900                             bool Hidden, const Twine &Label) {
901   // Due to "dot" we print an edge between two regions as an edge between the
902   // exit basic block and the entry basic of the respective regions.
903   const VPBlockBase *Tail = From->getExitBasicBlock();
904   const VPBlockBase *Head = To->getEntryBasicBlock();
905   OS << Indent << getUID(Tail) << " -> " << getUID(Head);
906   OS << " [ label=\"" << Label << '\"';
907   if (Tail != From)
908     OS << " ltail=" << getUID(From);
909   if (Head != To)
910     OS << " lhead=" << getUID(To);
911   if (Hidden)
912     OS << "; splines=none";
913   OS << "]\n";
914 }
915 
916 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
917   auto &Successors = Block->getSuccessors();
918   if (Successors.size() == 1)
919     drawEdge(Block, Successors.front(), false, "");
920   else if (Successors.size() == 2) {
921     drawEdge(Block, Successors.front(), false, "T");
922     drawEdge(Block, Successors.back(), false, "F");
923   } else {
924     unsigned SuccessorNumber = 0;
925     for (auto *Successor : Successors)
926       drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
927   }
928 }
929 
930 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
931   // Implement dot-formatted dump by performing plain-text dump into the
932   // temporary storage followed by some post-processing.
933   OS << Indent << getUID(BasicBlock) << " [label =\n";
934   bumpIndent(1);
935   std::string Str;
936   raw_string_ostream SS(Str);
937   // Use no indentation as we need to wrap the lines into quotes ourselves.
938   BasicBlock->print(SS, "", SlotTracker);
939 
940   // We need to process each line of the output separately, so split
941   // single-string plain-text dump.
942   SmallVector<StringRef, 0> Lines;
943   StringRef(Str).rtrim('\n').split(Lines, "\n");
944 
945   auto EmitLine = [&](StringRef Line, StringRef Suffix) {
946     OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
947   };
948 
949   // Don't need the "+" after the last line.
950   for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
951     EmitLine(Line, " +\n");
952   EmitLine(Lines.back(), "\n");
953 
954   bumpIndent(-1);
955   OS << Indent << "]\n";
956 
957   dumpEdges(BasicBlock);
958 }
959 
960 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
961   OS << Indent << "subgraph " << getUID(Region) << " {\n";
962   bumpIndent(1);
963   OS << Indent << "fontname=Courier\n"
964      << Indent << "label=\""
965      << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
966      << DOT::EscapeString(Region->getName()) << "\"\n";
967   // Dump the blocks of the region.
968   assert(Region->getEntry() && "Region contains no inner blocks.");
969   for (const VPBlockBase *Block : depth_first(Region->getEntry()))
970     dumpBlock(Block);
971   bumpIndent(-1);
972   OS << Indent << "}\n";
973   dumpEdges(Region);
974 }
975 
976 void VPlanIngredient::print(raw_ostream &O) const {
977   if (auto *Inst = dyn_cast<Instruction>(V)) {
978     if (!Inst->getType()->isVoidTy()) {
979       Inst->printAsOperand(O, false);
980       O << " = ";
981     }
982     O << Inst->getOpcodeName() << " ";
983     unsigned E = Inst->getNumOperands();
984     if (E > 0) {
985       Inst->getOperand(0)->printAsOperand(O, false);
986       for (unsigned I = 1; I < E; ++I)
987         Inst->getOperand(I)->printAsOperand(O << ", ", false);
988     }
989   } else // !Inst
990     V->printAsOperand(O, false);
991 }
992 
993 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
994                               VPSlotTracker &SlotTracker) const {
995   O << Indent << "WIDEN-CALL ";
996 
997   auto *CI = cast<CallInst>(getUnderlyingInstr());
998   if (CI->getType()->isVoidTy())
999     O << "void ";
1000   else {
1001     printAsOperand(O, SlotTracker);
1002     O << " = ";
1003   }
1004 
1005   O << "call @" << CI->getCalledFunction()->getName() << "(";
1006   printOperands(O, SlotTracker);
1007   O << ")";
1008 }
1009 
1010 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
1011                                 VPSlotTracker &SlotTracker) const {
1012   O << Indent << "WIDEN-SELECT ";
1013   printAsOperand(O, SlotTracker);
1014   O << " = select ";
1015   getOperand(0)->printAsOperand(O, SlotTracker);
1016   O << ", ";
1017   getOperand(1)->printAsOperand(O, SlotTracker);
1018   O << ", ";
1019   getOperand(2)->printAsOperand(O, SlotTracker);
1020   O << (InvariantCond ? " (condition is loop invariant)" : "");
1021 }
1022 
1023 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
1024                           VPSlotTracker &SlotTracker) const {
1025   O << Indent << "WIDEN ";
1026   printAsOperand(O, SlotTracker);
1027   O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
1028   printOperands(O, SlotTracker);
1029 }
1030 
1031 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
1032                                           VPSlotTracker &SlotTracker) const {
1033   O << Indent << "WIDEN-INDUCTION";
1034   if (getTruncInst()) {
1035     O << "\\l\"";
1036     O << " +\n" << Indent << "\"  " << VPlanIngredient(IV) << "\\l\"";
1037     O << " +\n" << Indent << "\"  ";
1038     getVPValue(0)->printAsOperand(O, SlotTracker);
1039   } else
1040     O << " " << VPlanIngredient(IV);
1041 }
1042 
1043 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
1044                              VPSlotTracker &SlotTracker) const {
1045   O << Indent << "WIDEN-GEP ";
1046   O << (IsPtrLoopInvariant ? "Inv" : "Var");
1047   size_t IndicesNumber = IsIndexLoopInvariant.size();
1048   for (size_t I = 0; I < IndicesNumber; ++I)
1049     O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
1050 
1051   O << " ";
1052   printAsOperand(O, SlotTracker);
1053   O << " = getelementptr ";
1054   printOperands(O, SlotTracker);
1055 }
1056 
1057 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1058                              VPSlotTracker &SlotTracker) const {
1059   O << Indent << "WIDEN-PHI ";
1060 
1061   auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
1062   // Unless all incoming values are modeled in VPlan  print the original PHI
1063   // directly.
1064   // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
1065   // values as VPValues.
1066   if (getNumOperands() != OriginalPhi->getNumOperands()) {
1067     O << VPlanIngredient(OriginalPhi);
1068     return;
1069   }
1070 
1071   printAsOperand(O, SlotTracker);
1072   O << " = phi ";
1073   printOperands(O, SlotTracker);
1074 }
1075 
1076 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
1077                           VPSlotTracker &SlotTracker) const {
1078   O << Indent << "BLEND ";
1079   Phi->printAsOperand(O, false);
1080   O << " =";
1081   if (getNumIncomingValues() == 1) {
1082     // Not a User of any mask: not really blending, this is a
1083     // single-predecessor phi.
1084     O << " ";
1085     getIncomingValue(0)->printAsOperand(O, SlotTracker);
1086   } else {
1087     for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
1088       O << " ";
1089       getIncomingValue(I)->printAsOperand(O, SlotTracker);
1090       O << "/";
1091       getMask(I)->printAsOperand(O, SlotTracker);
1092     }
1093   }
1094 }
1095 
1096 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
1097                               VPSlotTracker &SlotTracker) const {
1098   O << Indent << "REDUCE ";
1099   printAsOperand(O, SlotTracker);
1100   O << " = ";
1101   getChainOp()->printAsOperand(O, SlotTracker);
1102   O << " + reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode())
1103     << " (";
1104   getVecOp()->printAsOperand(O, SlotTracker);
1105   if (getCondOp()) {
1106     O << ", ";
1107     getCondOp()->printAsOperand(O, SlotTracker);
1108   }
1109   O << ")";
1110 }
1111 
1112 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
1113                               VPSlotTracker &SlotTracker) const {
1114   O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
1115 
1116   if (!getUnderlyingInstr()->getType()->isVoidTy()) {
1117     printAsOperand(O, SlotTracker);
1118     O << " = ";
1119   }
1120   O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
1121   printOperands(O, SlotTracker);
1122 
1123   if (AlsoPack)
1124     O << " (S->V)";
1125 }
1126 
1127 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1128                                 VPSlotTracker &SlotTracker) const {
1129   O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1130   printAsOperand(O, SlotTracker);
1131   O << " = ";
1132   printOperands(O, SlotTracker);
1133 }
1134 
1135 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
1136                                            VPSlotTracker &SlotTracker) const {
1137   O << Indent << "WIDEN ";
1138 
1139   if (!isStore()) {
1140     getVPValue()->printAsOperand(O, SlotTracker);
1141     O << " = ";
1142   }
1143   O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
1144 
1145   printOperands(O, SlotTracker);
1146 }
1147 #endif
1148 
1149 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
1150   Value *CanonicalIV = State.CanonicalIV;
1151   Type *STy = CanonicalIV->getType();
1152   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
1153   ElementCount VF = State.VF;
1154   assert(!VF.isScalable() && "the code following assumes non scalables ECs");
1155   Value *VStart = VF.isScalar()
1156                       ? CanonicalIV
1157                       : Builder.CreateVectorSplat(VF.getKnownMinValue(),
1158                                                   CanonicalIV, "broadcast");
1159   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1160     SmallVector<Constant *, 8> Indices;
1161     for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
1162       Indices.push_back(
1163           ConstantInt::get(STy, Part * VF.getKnownMinValue() + Lane));
1164     // If VF == 1, there is only one iteration in the loop above, thus the
1165     // element pushed back into Indices is ConstantInt::get(STy, Part)
1166     Constant *VStep =
1167         VF.isScalar() ? Indices.back() : ConstantVector::get(Indices);
1168     // Add the consecutive indices to the vector value.
1169     Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1170     State.set(getVPSingleValue(), CanonicalVectorIV, Part);
1171   }
1172 }
1173 
1174 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1175 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
1176                                      VPSlotTracker &SlotTracker) const {
1177   O << Indent << "EMIT ";
1178   getVPValue()->printAsOperand(O, SlotTracker);
1179   O << " = WIDEN-CANONICAL-INDUCTION";
1180 }
1181 #endif
1182 
1183 template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
1184 
1185 void VPValue::replaceAllUsesWith(VPValue *New) {
1186   for (unsigned J = 0; J < getNumUsers();) {
1187     VPUser *User = Users[J];
1188     unsigned NumUsers = getNumUsers();
1189     for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
1190       if (User->getOperand(I) == this)
1191         User->setOperand(I, New);
1192     // If a user got removed after updating the current user, the next user to
1193     // update will be moved to the current position, so we only need to
1194     // increment the index if the number of users did not change.
1195     if (NumUsers == getNumUsers())
1196       J++;
1197   }
1198 }
1199 
1200 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1201 void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const {
1202   if (const Value *UV = getUnderlyingValue()) {
1203     OS << "ir<";
1204     UV->printAsOperand(OS, false);
1205     OS << ">";
1206     return;
1207   }
1208 
1209   unsigned Slot = Tracker.getSlot(this);
1210   if (Slot == unsigned(-1))
1211     OS << "<badref>";
1212   else
1213     OS << "vp<%" << Tracker.getSlot(this) << ">";
1214 }
1215 
1216 void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
1217   interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
1218     Op->printAsOperand(O, SlotTracker);
1219   });
1220 }
1221 #endif
1222 
1223 void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
1224                                           Old2NewTy &Old2New,
1225                                           InterleavedAccessInfo &IAI) {
1226   ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry());
1227   for (VPBlockBase *Base : RPOT) {
1228     visitBlock(Base, Old2New, IAI);
1229   }
1230 }
1231 
1232 void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
1233                                          InterleavedAccessInfo &IAI) {
1234   if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
1235     for (VPRecipeBase &VPI : *VPBB) {
1236       if (isa<VPWidenPHIRecipe>(&VPI))
1237         continue;
1238       assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
1239       auto *VPInst = cast<VPInstruction>(&VPI);
1240       auto *Inst = cast<Instruction>(VPInst->getUnderlyingValue());
1241       auto *IG = IAI.getInterleaveGroup(Inst);
1242       if (!IG)
1243         continue;
1244 
1245       auto NewIGIter = Old2New.find(IG);
1246       if (NewIGIter == Old2New.end())
1247         Old2New[IG] = new InterleaveGroup<VPInstruction>(
1248             IG->getFactor(), IG->isReverse(), IG->getAlign());
1249 
1250       if (Inst == IG->getInsertPos())
1251         Old2New[IG]->setInsertPos(VPInst);
1252 
1253       InterleaveGroupMap[VPInst] = Old2New[IG];
1254       InterleaveGroupMap[VPInst]->insertMember(
1255           VPInst, IG->getIndex(Inst),
1256           Align(IG->isReverse() ? (-1) * int(IG->getFactor())
1257                                 : IG->getFactor()));
1258     }
1259   } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1260     visitRegion(Region, Old2New, IAI);
1261   else
1262     llvm_unreachable("Unsupported kind of VPBlock.");
1263 }
1264 
1265 VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
1266                                                  InterleavedAccessInfo &IAI) {
1267   Old2NewTy Old2New;
1268   visitRegion(cast<VPRegionBlock>(Plan.getEntry()), Old2New, IAI);
1269 }
1270 
1271 void VPSlotTracker::assignSlot(const VPValue *V) {
1272   assert(Slots.find(V) == Slots.end() && "VPValue already has a slot!");
1273   Slots[V] = NextSlot++;
1274 }
1275 
1276 void VPSlotTracker::assignSlots(const VPlan &Plan) {
1277 
1278   for (const VPValue *V : Plan.VPExternalDefs)
1279     assignSlot(V);
1280 
1281   if (Plan.BackedgeTakenCount)
1282     assignSlot(Plan.BackedgeTakenCount);
1283 
1284   ReversePostOrderTraversal<
1285       VPBlockRecursiveTraversalWrapper<const VPBlockBase *>>
1286       RPOT(VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(
1287           Plan.getEntry()));
1288   for (const VPBasicBlock *VPBB :
1289        VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
1290     for (const VPRecipeBase &Recipe : *VPBB)
1291       for (VPValue *Def : Recipe.definedValues())
1292         assignSlot(Def);
1293 }
1294