xref: /llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp (revision eaf48dd9b079c005d92ed9ef858f12bc452e71ef)
1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This is the LLVM vectorization plan. It represents a candidate for
11 /// vectorization, allowing to plan and optimize how to vectorize a given loop
12 /// before generating LLVM-IR.
13 /// The vectorizer uses vectorization plans to estimate the costs of potential
14 /// candidates and if profitable to execute the desired plan, generating vector
15 /// LLVM-IR code.
16 ///
17 //===----------------------------------------------------------------------===//
18 
19 #include "VPlan.h"
20 #include "VPlanDominatorTree.h"
21 #include "llvm/ADT/DepthFirstIterator.h"
22 #include "llvm/ADT/PostOrderIterator.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/Analysis/IVDescriptors.h"
27 #include "llvm/Analysis/LoopInfo.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/CFG.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/Support/Casting.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/GenericDomTreeConstruction.h"
40 #include "llvm/Support/GraphWriter.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
43 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
44 #include <cassert>
45 #include <string>
46 #include <vector>
47 
48 using namespace llvm;
49 extern cl::opt<bool> EnableVPlanNativePath;
50 
51 #define DEBUG_TYPE "vplan"
52 
53 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
54 raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) {
55   const VPInstruction *Instr = dyn_cast<VPInstruction>(&V);
56   VPSlotTracker SlotTracker(
57       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
58   V.print(OS, SlotTracker);
59   return OS;
60 }
61 #endif
62 
63 Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder,
64                                 const ElementCount &VF) const {
65   switch (LaneKind) {
66   case VPLane::Kind::ScalableLast:
67     // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
68     return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
69                              Builder.getInt32(VF.getKnownMinValue() - Lane));
70   case VPLane::Kind::First:
71     return Builder.getInt32(Lane);
72   }
73   llvm_unreachable("Unknown lane kind");
74 }
75 
76 VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
77     : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
78   if (Def)
79     Def->addDefinedValue(this);
80 }
81 
82 VPValue::~VPValue() {
83   assert(Users.empty() && "trying to delete a VPValue with remaining users");
84   if (Def)
85     Def->removeDefinedValue(this);
86 }
87 
88 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
89 void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const {
90   if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def))
91     R->print(OS, "", SlotTracker);
92   else
93     printAsOperand(OS, SlotTracker);
94 }
95 
96 void VPValue::dump() const {
97   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
98   VPSlotTracker SlotTracker(
99       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
100   print(dbgs(), SlotTracker);
101   dbgs() << "\n";
102 }
103 
104 void VPDef::dump() const {
105   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
106   VPSlotTracker SlotTracker(
107       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
108   print(dbgs(), "", SlotTracker);
109   dbgs() << "\n";
110 }
111 #endif
112 
113 // Get the top-most entry block of \p Start. This is the entry block of the
114 // containing VPlan. This function is templated to support both const and non-const blocks
115 template <typename T> static T *getPlanEntry(T *Start) {
116   T *Next = Start;
117   T *Current = Start;
118   while ((Next = Next->getParent()))
119     Current = Next;
120 
121   SmallSetVector<T *, 8> WorkList;
122   WorkList.insert(Current);
123 
124   for (unsigned i = 0; i < WorkList.size(); i++) {
125     T *Current = WorkList[i];
126     if (Current->getNumPredecessors() == 0)
127       return Current;
128     auto &Predecessors = Current->getPredecessors();
129     WorkList.insert(Predecessors.begin(), Predecessors.end());
130   }
131 
132   llvm_unreachable("VPlan without any entry node without predecessors");
133 }
134 
135 VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
136 
137 const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
138 
139 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
140 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
141   const VPBlockBase *Block = this;
142   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
143     Block = Region->getEntry();
144   return cast<VPBasicBlock>(Block);
145 }
146 
147 VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
148   VPBlockBase *Block = this;
149   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
150     Block = Region->getEntry();
151   return cast<VPBasicBlock>(Block);
152 }
153 
154 void VPBlockBase::setPlan(VPlan *ParentPlan) {
155   assert(ParentPlan->getEntry() == this &&
156          "Can only set plan on its entry block.");
157   Plan = ParentPlan;
158 }
159 
160 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
161 const VPBasicBlock *VPBlockBase::getExitingBasicBlock() const {
162   const VPBlockBase *Block = this;
163   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
164     Block = Region->getExiting();
165   return cast<VPBasicBlock>(Block);
166 }
167 
168 VPBasicBlock *VPBlockBase::getExitingBasicBlock() {
169   VPBlockBase *Block = this;
170   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
171     Block = Region->getExiting();
172   return cast<VPBasicBlock>(Block);
173 }
174 
175 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
176   if (!Successors.empty() || !Parent)
177     return this;
178   assert(Parent->getExiting() == this &&
179          "Block w/o successors not the exiting block of its parent.");
180   return Parent->getEnclosingBlockWithSuccessors();
181 }
182 
183 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
184   if (!Predecessors.empty() || !Parent)
185     return this;
186   assert(Parent->getEntry() == this &&
187          "Block w/o predecessors not the entry of its parent.");
188   return Parent->getEnclosingBlockWithPredecessors();
189 }
190 
191 void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
192   SmallVector<VPBlockBase *, 8> Blocks(depth_first(Entry));
193 
194   for (VPBlockBase *Block : Blocks)
195     delete Block;
196 }
197 
198 VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
199   iterator It = begin();
200   while (It != end() && It->isPhi())
201     It++;
202   return It;
203 }
204 
205 Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
206   if (!Def->getDef())
207     return Def->getLiveInIRValue();
208 
209   if (hasScalarValue(Def, Instance)) {
210     return Data
211         .PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)];
212   }
213 
214   assert(hasVectorValue(Def, Instance.Part));
215   auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
216   if (!VecPart->getType()->isVectorTy()) {
217     assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar");
218     return VecPart;
219   }
220   // TODO: Cache created scalar values.
221   Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF);
222   auto *Extract = Builder.CreateExtractElement(VecPart, Lane);
223   // set(Def, Extract, Instance);
224   return Extract;
225 }
226 BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
227   VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
228   return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
229 }
230 
231 BasicBlock *
232 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
233   // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
234   // Pred stands for Predessor. Prev stands for Previous - last visited/created.
235   BasicBlock *PrevBB = CFG.PrevBB;
236   BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
237                                          PrevBB->getParent(), CFG.ExitBB);
238   LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
239 
240   // Hook up the new basic block to its predecessors.
241   for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
242     VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
243     auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
244     BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
245 
246     assert(PredBB && "Predecessor basic-block not found building successor.");
247     auto *PredBBTerminator = PredBB->getTerminator();
248     LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
249 
250     auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
251     if (isa<UnreachableInst>(PredBBTerminator)) {
252       assert(PredVPSuccessors.size() == 1 &&
253              "Predecessor ending w/o branch must have single successor.");
254       DebugLoc DL = PredBBTerminator->getDebugLoc();
255       PredBBTerminator->eraseFromParent();
256       auto *Br = BranchInst::Create(NewBB, PredBB);
257       Br->setDebugLoc(DL);
258     } else if (TermBr && !TermBr->isConditional()) {
259       TermBr->setSuccessor(0, NewBB);
260     } else {
261       // Set each forward successor here when it is created, excluding
262       // backedges. A backward successor is set when the branch is created.
263       unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
264       assert(!TermBr->getSuccessor(idx) &&
265              "Trying to reset an existing successor block.");
266       TermBr->setSuccessor(idx, NewBB);
267     }
268   }
269   return NewBB;
270 }
271 
272 void VPBasicBlock::execute(VPTransformState *State) {
273   bool Replica = State->Instance && !State->Instance->isFirstIteration();
274   VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
275   VPBlockBase *SingleHPred = nullptr;
276   BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
277 
278   auto IsLoopRegion = [](VPBlockBase *BB) {
279     auto *R = dyn_cast<VPRegionBlock>(BB);
280     return R && !R->isReplicator();
281   };
282 
283   // 1. Create an IR basic block, or reuse the last one or ExitBB if possible.
284   if (getPlan()->getVectorLoopRegion()->getSingleSuccessor() == this) {
285     // ExitBB can be re-used for the exit block of the Plan.
286     NewBB = State->CFG.ExitBB;
287     State->CFG.PrevBB = NewBB;
288 
289     // Update the branch instruction in the predecessor to branch to ExitBB.
290     VPBlockBase *PredVPB = getSingleHierarchicalPredecessor();
291     VPBasicBlock *ExitingVPBB = PredVPB->getExitingBasicBlock();
292     assert(PredVPB->getSingleSuccessor() == this &&
293            "predecessor must have the current block as only successor");
294     BasicBlock *ExitingBB = State->CFG.VPBB2IRBB[ExitingVPBB];
295     // The Exit block of a loop is always set to be successor 0 of the Exiting
296     // block.
297     cast<BranchInst>(ExitingBB->getTerminator())->setSuccessor(0, NewBB);
298   } else if (PrevVPBB && /* A */
299              !((SingleHPred = getSingleHierarchicalPredecessor()) &&
300                SingleHPred->getExitingBasicBlock() == PrevVPBB &&
301                PrevVPBB->getSingleHierarchicalSuccessor() &&
302                (SingleHPred->getParent() == getEnclosingLoopRegion() &&
303                 !IsLoopRegion(SingleHPred))) &&         /* B */
304              !(Replica && getPredecessors().empty())) { /* C */
305     // The last IR basic block is reused, as an optimization, in three cases:
306     // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null;
307     // B. when the current VPBB has a single (hierarchical) predecessor which
308     //    is PrevVPBB and the latter has a single (hierarchical) successor which
309     //    both are in the same non-replicator region; and
310     // C. when the current VPBB is an entry of a region replica - where PrevVPBB
311     //    is the exiting VPBB of this region from a previous instance, or the
312     //    predecessor of this region.
313 
314     NewBB = createEmptyBasicBlock(State->CFG);
315     State->Builder.SetInsertPoint(NewBB);
316     // Temporarily terminate with unreachable until CFG is rewired.
317     UnreachableInst *Terminator = State->Builder.CreateUnreachable();
318     // Register NewBB in its loop. In innermost loops its the same for all
319     // BB's.
320     if (State->CurrentVectorLoop)
321       State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
322     State->Builder.SetInsertPoint(Terminator);
323     State->CFG.PrevBB = NewBB;
324   }
325 
326   // 2. Fill the IR basic block with IR instructions.
327   LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
328                     << " in BB:" << NewBB->getName() << '\n');
329 
330   State->CFG.VPBB2IRBB[this] = NewBB;
331   State->CFG.PrevVPBB = this;
332 
333   for (VPRecipeBase &Recipe : Recipes)
334     Recipe.execute(*State);
335 
336   LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
337 }
338 
339 void VPBasicBlock::dropAllReferences(VPValue *NewValue) {
340   for (VPRecipeBase &R : Recipes) {
341     for (auto *Def : R.definedValues())
342       Def->replaceAllUsesWith(NewValue);
343 
344     for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
345       R.setOperand(I, NewValue);
346   }
347 }
348 
349 VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
350   assert((SplitAt == end() || SplitAt->getParent() == this) &&
351          "can only split at a position in the same block");
352 
353   SmallVector<VPBlockBase *, 2> Succs(successors());
354   // First, disconnect the current block from its successors.
355   for (VPBlockBase *Succ : Succs)
356     VPBlockUtils::disconnectBlocks(this, Succ);
357 
358   // Create new empty block after the block to split.
359   auto *SplitBlock = new VPBasicBlock(getName() + ".split");
360   VPBlockUtils::insertBlockAfter(SplitBlock, this);
361 
362   // Add successors for block to split to new block.
363   for (VPBlockBase *Succ : Succs)
364     VPBlockUtils::connectBlocks(SplitBlock, Succ);
365 
366   // Finally, move the recipes starting at SplitAt to new block.
367   for (VPRecipeBase &ToMove :
368        make_early_inc_range(make_range(SplitAt, this->end())))
369     ToMove.moveBefore(*SplitBlock, SplitBlock->end());
370 
371   return SplitBlock;
372 }
373 
374 VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
375   VPRegionBlock *P = getParent();
376   if (P && P->isReplicator()) {
377     P = P->getParent();
378     assert(!cast<VPRegionBlock>(P)->isReplicator() &&
379            "unexpected nested replicate regions");
380   }
381   return P;
382 }
383 
384 static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
385   if (VPBB->empty()) {
386     assert(
387         VPBB->getNumSuccessors() < 2 &&
388         "block with multiple successors doesn't have a recipe as terminator");
389     return false;
390   }
391 
392   const VPRecipeBase *R = &VPBB->back();
393   auto *VPI = dyn_cast<VPInstruction>(R);
394   bool IsCondBranch =
395       isa<VPBranchOnMaskRecipe>(R) ||
396       (VPI && (VPI->getOpcode() == VPInstruction::BranchOnCond ||
397                VPI->getOpcode() == VPInstruction::BranchOnCount));
398   (void)IsCondBranch;
399 
400   if (VPBB->getNumSuccessors() >= 2 || VPBB->isExiting()) {
401     assert(IsCondBranch && "block with multiple successors not terminated by "
402                            "conditional branch recipe");
403 
404     return true;
405   }
406 
407   assert(
408       !IsCondBranch &&
409       "block with 0 or 1 successors terminated by conditional branch recipe");
410   return false;
411 }
412 
413 VPRecipeBase *VPBasicBlock::getTerminator() {
414   if (hasConditionalTerminator(this))
415     return &back();
416   return nullptr;
417 }
418 
419 const VPRecipeBase *VPBasicBlock::getTerminator() const {
420   if (hasConditionalTerminator(this))
421     return &back();
422   return nullptr;
423 }
424 
425 bool VPBasicBlock::isExiting() const {
426   return getParent()->getExitingBasicBlock() == this;
427 }
428 
429 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
430 void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
431   if (getSuccessors().empty()) {
432     O << Indent << "No successors\n";
433   } else {
434     O << Indent << "Successor(s): ";
435     ListSeparator LS;
436     for (auto *Succ : getSuccessors())
437       O << LS << Succ->getName();
438     O << '\n';
439   }
440 }
441 
442 void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
443                          VPSlotTracker &SlotTracker) const {
444   O << Indent << getName() << ":\n";
445 
446   auto RecipeIndent = Indent + "  ";
447   for (const VPRecipeBase &Recipe : *this) {
448     Recipe.print(O, RecipeIndent, SlotTracker);
449     O << '\n';
450   }
451 
452   printSuccessors(O, Indent);
453 }
454 #endif
455 
456 void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
457   for (VPBlockBase *Block : depth_first(Entry))
458     // Drop all references in VPBasicBlocks and replace all uses with
459     // DummyValue.
460     Block->dropAllReferences(NewValue);
461 }
462 
463 void VPRegionBlock::execute(VPTransformState *State) {
464   ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry);
465 
466   if (!isReplicator()) {
467     // Create and register the new vector loop.
468     Loop *PrevLoop = State->CurrentVectorLoop;
469     State->CurrentVectorLoop = State->LI->AllocateLoop();
470     BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
471     Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
472 
473     // Insert the new loop into the loop nest and register the new basic blocks
474     // before calling any utilities such as SCEV that require valid LoopInfo.
475     if (ParentLoop)
476       ParentLoop->addChildLoop(State->CurrentVectorLoop);
477     else
478       State->LI->addTopLevelLoop(State->CurrentVectorLoop);
479 
480     // Visit the VPBlocks connected to "this", starting from it.
481     for (VPBlockBase *Block : RPOT) {
482       LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
483       Block->execute(State);
484     }
485 
486     State->CurrentVectorLoop = PrevLoop;
487     return;
488   }
489 
490   assert(!State->Instance && "Replicating a Region with non-null instance.");
491 
492   // Enter replicating mode.
493   State->Instance = VPIteration(0, 0);
494 
495   for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
496     State->Instance->Part = Part;
497     assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
498     for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
499          ++Lane) {
500       State->Instance->Lane = VPLane(Lane, VPLane::Kind::First);
501       // Visit the VPBlocks connected to \p this, starting from it.
502       for (VPBlockBase *Block : RPOT) {
503         LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
504         Block->execute(State);
505       }
506     }
507   }
508 
509   // Exit replicating mode.
510   State->Instance.reset();
511 }
512 
513 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
514 void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
515                           VPSlotTracker &SlotTracker) const {
516   O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
517   auto NewIndent = Indent + "  ";
518   for (auto *BlockBase : depth_first(Entry)) {
519     O << '\n';
520     BlockBase->print(O, NewIndent, SlotTracker);
521   }
522   O << Indent << "}\n";
523 
524   printSuccessors(O, Indent);
525 }
526 #endif
527 
528 bool VPRecipeBase::mayWriteToMemory() const {
529   switch (getVPDefID()) {
530   case VPWidenMemoryInstructionSC: {
531     return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
532   }
533   case VPReplicateSC:
534   case VPWidenCallSC:
535     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
536         ->mayWriteToMemory();
537   case VPBranchOnMaskSC:
538     return false;
539   case VPWidenIntOrFpInductionSC:
540   case VPWidenCanonicalIVSC:
541   case VPWidenPHISC:
542   case VPBlendSC:
543   case VPWidenSC:
544   case VPWidenGEPSC:
545   case VPReductionSC:
546   case VPWidenSelectSC: {
547     const Instruction *I =
548         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
549     (void)I;
550     assert((!I || !I->mayWriteToMemory()) &&
551            "underlying instruction may write to memory");
552     return false;
553   }
554   default:
555     return true;
556   }
557 }
558 
559 bool VPRecipeBase::mayReadFromMemory() const {
560   switch (getVPDefID()) {
561   case VPWidenMemoryInstructionSC: {
562     return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
563   }
564   case VPReplicateSC:
565   case VPWidenCallSC:
566     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
567         ->mayReadFromMemory();
568   case VPBranchOnMaskSC:
569     return false;
570   case VPWidenIntOrFpInductionSC:
571   case VPWidenCanonicalIVSC:
572   case VPWidenPHISC:
573   case VPBlendSC:
574   case VPWidenSC:
575   case VPWidenGEPSC:
576   case VPReductionSC:
577   case VPWidenSelectSC: {
578     const Instruction *I =
579         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
580     (void)I;
581     assert((!I || !I->mayReadFromMemory()) &&
582            "underlying instruction may read from memory");
583     return false;
584   }
585   default:
586     return true;
587   }
588 }
589 
590 bool VPRecipeBase::mayHaveSideEffects() const {
591   switch (getVPDefID()) {
592   case VPBranchOnMaskSC:
593     return false;
594   case VPWidenIntOrFpInductionSC:
595   case VPWidenPointerInductionSC:
596   case VPWidenCanonicalIVSC:
597   case VPWidenPHISC:
598   case VPBlendSC:
599   case VPWidenSC:
600   case VPWidenGEPSC:
601   case VPReductionSC:
602   case VPWidenSelectSC:
603   case VPScalarIVStepsSC: {
604     const Instruction *I =
605         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
606     (void)I;
607     assert((!I || !I->mayHaveSideEffects()) &&
608            "underlying instruction has side-effects");
609     return false;
610   }
611   case VPReplicateSC: {
612     auto *R = cast<VPReplicateRecipe>(this);
613     return R->getUnderlyingInstr()->mayHaveSideEffects();
614   }
615   default:
616     return true;
617   }
618 }
619 
620 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
621   auto Lane = VPLane::getLastLaneForVF(State.VF);
622   VPValue *ExitValue = getOperand(0);
623   if (Plan.isUniformAfterVectorization(ExitValue))
624     Lane = VPLane::getFirstLane();
625   Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
626                    State.Builder.GetInsertBlock());
627 }
628 
629 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
630   assert(!Parent && "Recipe already in some VPBasicBlock");
631   assert(InsertPos->getParent() &&
632          "Insertion position not in any VPBasicBlock");
633   Parent = InsertPos->getParent();
634   Parent->getRecipeList().insert(InsertPos->getIterator(), this);
635 }
636 
637 void VPRecipeBase::insertBefore(VPBasicBlock &BB,
638                                 iplist<VPRecipeBase>::iterator I) {
639   assert(!Parent && "Recipe already in some VPBasicBlock");
640   assert(I == BB.end() || I->getParent() == &BB);
641   Parent = &BB;
642   BB.getRecipeList().insert(I, this);
643 }
644 
645 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
646   assert(!Parent && "Recipe already in some VPBasicBlock");
647   assert(InsertPos->getParent() &&
648          "Insertion position not in any VPBasicBlock");
649   Parent = InsertPos->getParent();
650   Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
651 }
652 
653 void VPRecipeBase::removeFromParent() {
654   assert(getParent() && "Recipe not in any VPBasicBlock");
655   getParent()->getRecipeList().remove(getIterator());
656   Parent = nullptr;
657 }
658 
659 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
660   assert(getParent() && "Recipe not in any VPBasicBlock");
661   return getParent()->getRecipeList().erase(getIterator());
662 }
663 
664 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
665   removeFromParent();
666   insertAfter(InsertPos);
667 }
668 
669 void VPRecipeBase::moveBefore(VPBasicBlock &BB,
670                               iplist<VPRecipeBase>::iterator I) {
671   removeFromParent();
672   insertBefore(BB, I);
673 }
674 
675 void VPInstruction::generateInstruction(VPTransformState &State,
676                                         unsigned Part) {
677   IRBuilderBase &Builder = State.Builder;
678   Builder.SetCurrentDebugLocation(DL);
679 
680   if (Instruction::isBinaryOp(getOpcode())) {
681     Value *A = State.get(getOperand(0), Part);
682     Value *B = State.get(getOperand(1), Part);
683     Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
684     State.set(this, V, Part);
685     return;
686   }
687 
688   switch (getOpcode()) {
689   case VPInstruction::Not: {
690     Value *A = State.get(getOperand(0), Part);
691     Value *V = Builder.CreateNot(A);
692     State.set(this, V, Part);
693     break;
694   }
695   case VPInstruction::ICmpULE: {
696     Value *IV = State.get(getOperand(0), Part);
697     Value *TC = State.get(getOperand(1), Part);
698     Value *V = Builder.CreateICmpULE(IV, TC);
699     State.set(this, V, Part);
700     break;
701   }
702   case Instruction::Select: {
703     Value *Cond = State.get(getOperand(0), Part);
704     Value *Op1 = State.get(getOperand(1), Part);
705     Value *Op2 = State.get(getOperand(2), Part);
706     Value *V = Builder.CreateSelect(Cond, Op1, Op2);
707     State.set(this, V, Part);
708     break;
709   }
710   case VPInstruction::ActiveLaneMask: {
711     // Get first lane of vector induction variable.
712     Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
713     // Get the original loop tripcount.
714     Value *ScalarTC = State.get(getOperand(1), Part);
715 
716     auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
717     auto *PredTy = VectorType::get(Int1Ty, State.VF);
718     Instruction *Call = Builder.CreateIntrinsic(
719         Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
720         {VIVElem0, ScalarTC}, nullptr, "active.lane.mask");
721     State.set(this, Call, Part);
722     break;
723   }
724   case VPInstruction::FirstOrderRecurrenceSplice: {
725     // Generate code to combine the previous and current values in vector v3.
726     //
727     //   vector.ph:
728     //     v_init = vector(..., ..., ..., a[-1])
729     //     br vector.body
730     //
731     //   vector.body
732     //     i = phi [0, vector.ph], [i+4, vector.body]
733     //     v1 = phi [v_init, vector.ph], [v2, vector.body]
734     //     v2 = a[i, i+1, i+2, i+3];
735     //     v3 = vector(v1(3), v2(0, 1, 2))
736 
737     // For the first part, use the recurrence phi (v1), otherwise v2.
738     auto *V1 = State.get(getOperand(0), 0);
739     Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
740     if (!PartMinus1->getType()->isVectorTy()) {
741       State.set(this, PartMinus1, Part);
742     } else {
743       Value *V2 = State.get(getOperand(1), Part);
744       State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part);
745     }
746     break;
747   }
748 
749   case VPInstruction::CanonicalIVIncrement:
750   case VPInstruction::CanonicalIVIncrementNUW: {
751     Value *Next = nullptr;
752     if (Part == 0) {
753       bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
754       auto *Phi = State.get(getOperand(0), 0);
755       // The loop step is equal to the vectorization factor (num of SIMD
756       // elements) times the unroll factor (num of SIMD instructions).
757       Value *Step =
758           createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
759       Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false);
760     } else {
761       Next = State.get(this, 0);
762     }
763 
764     State.set(this, Next, Part);
765     break;
766   }
767   case VPInstruction::BranchOnCond: {
768     if (Part != 0)
769       break;
770 
771     Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
772     VPRegionBlock *ParentRegion = getParent()->getParent();
773     VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
774 
775     // Replace the temporary unreachable terminator with a new conditional
776     // branch, hooking it up to backward destination for exiting blocks now and
777     // to forward destination(s) later when they are created.
778     BranchInst *CondBr =
779         Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
780 
781     if (getParent()->isExiting())
782       CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
783 
784     CondBr->setSuccessor(0, nullptr);
785     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
786     break;
787   }
788   case VPInstruction::BranchOnCount: {
789     if (Part != 0)
790       break;
791     // First create the compare.
792     Value *IV = State.get(getOperand(0), Part);
793     Value *TC = State.get(getOperand(1), Part);
794     Value *Cond = Builder.CreateICmpEQ(IV, TC);
795 
796     // Now create the branch.
797     auto *Plan = getParent()->getPlan();
798     VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
799     VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
800 
801     // Replace the temporary unreachable terminator with a new conditional
802     // branch, hooking it up to backward destination (the header) now and to the
803     // forward destination (the exit/middle block) later when it is created.
804     // Note that CreateCondBr expects a valid BB as first argument, so we need
805     // to set it to nullptr later.
806     BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
807                                               State.CFG.VPBB2IRBB[Header]);
808     CondBr->setSuccessor(0, nullptr);
809     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
810     break;
811   }
812   default:
813     llvm_unreachable("Unsupported opcode for instruction");
814   }
815 }
816 
817 void VPInstruction::execute(VPTransformState &State) {
818   assert(!State.Instance && "VPInstruction executing an Instance");
819   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
820   State.Builder.setFastMathFlags(FMF);
821   for (unsigned Part = 0; Part < State.UF; ++Part)
822     generateInstruction(State, Part);
823 }
824 
825 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
826 void VPInstruction::dump() const {
827   VPSlotTracker SlotTracker(getParent()->getPlan());
828   print(dbgs(), "", SlotTracker);
829 }
830 
831 void VPInstruction::print(raw_ostream &O, const Twine &Indent,
832                           VPSlotTracker &SlotTracker) const {
833   O << Indent << "EMIT ";
834 
835   if (hasResult()) {
836     printAsOperand(O, SlotTracker);
837     O << " = ";
838   }
839 
840   switch (getOpcode()) {
841   case VPInstruction::Not:
842     O << "not";
843     break;
844   case VPInstruction::ICmpULE:
845     O << "icmp ule";
846     break;
847   case VPInstruction::SLPLoad:
848     O << "combined load";
849     break;
850   case VPInstruction::SLPStore:
851     O << "combined store";
852     break;
853   case VPInstruction::ActiveLaneMask:
854     O << "active lane mask";
855     break;
856   case VPInstruction::FirstOrderRecurrenceSplice:
857     O << "first-order splice";
858     break;
859   case VPInstruction::CanonicalIVIncrement:
860     O << "VF * UF + ";
861     break;
862   case VPInstruction::CanonicalIVIncrementNUW:
863     O << "VF * UF +(nuw) ";
864     break;
865   case VPInstruction::BranchOnCond:
866     O << "branch-on-cond";
867     break;
868   case VPInstruction::BranchOnCount:
869     O << "branch-on-count ";
870     break;
871   default:
872     O << Instruction::getOpcodeName(getOpcode());
873   }
874 
875   O << FMF;
876 
877   for (const VPValue *Operand : operands()) {
878     O << " ";
879     Operand->printAsOperand(O, SlotTracker);
880   }
881 
882   if (DL) {
883     O << ", !dbg ";
884     DL.print(O);
885   }
886 }
887 #endif
888 
889 void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
890   // Make sure the VPInstruction is a floating-point operation.
891   assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
892           Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
893           Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
894           Opcode == Instruction::FCmp) &&
895          "this op can't take fast-math flags");
896   FMF = FMFNew;
897 }
898 
899 void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
900                              Value *CanonicalIVStartValue,
901                              VPTransformState &State) {
902 
903   VPBasicBlock *ExitingVPBB = getVectorLoopRegion()->getExitingBasicBlock();
904   auto *Term = dyn_cast<VPInstruction>(&ExitingVPBB->back());
905   // Try to simplify BranchOnCount to 'BranchOnCond true' if TC <= VF * UF when
906   // preparing to execute the plan for the main vector loop.
907   if (!CanonicalIVStartValue && Term &&
908       Term->getOpcode() == VPInstruction::BranchOnCount &&
909       isa<ConstantInt>(TripCountV)) {
910     ConstantInt *C = cast<ConstantInt>(TripCountV);
911     uint64_t TCVal = C->getZExtValue();
912     if (TCVal && TCVal <= State.VF.getKnownMinValue() * State.UF) {
913       auto *BOC =
914           new VPInstruction(VPInstruction::BranchOnCond,
915                             {getOrAddExternalDef(State.Builder.getTrue())});
916       Term->eraseFromParent();
917       ExitingVPBB->appendRecipe(BOC);
918       // TODO: Further simplifications are possible
919       //      1. Replace inductions with constants.
920       //      2. Replace vector loop region with VPBasicBlock.
921     }
922   }
923 
924   // Check if the trip count is needed, and if so build it.
925   if (TripCount && TripCount->getNumUsers()) {
926     for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
927       State.set(TripCount, TripCountV, Part);
928   }
929 
930   // Check if the backedge taken count is needed, and if so build it.
931   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
932     IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
933     auto *TCMO = Builder.CreateSub(TripCountV,
934                                    ConstantInt::get(TripCountV->getType(), 1),
935                                    "trip.count.minus.1");
936     auto VF = State.VF;
937     Value *VTCMO =
938         VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast");
939     for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
940       State.set(BackedgeTakenCount, VTCMO, Part);
941   }
942 
943   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
944     State.set(&VectorTripCount, VectorTripCountV, Part);
945 
946   // When vectorizing the epilogue loop, the canonical induction start value
947   // needs to be changed from zero to the value after the main vector loop.
948   if (CanonicalIVStartValue) {
949     VPValue *VPV = getOrAddExternalDef(CanonicalIVStartValue);
950     auto *IV = getCanonicalIV();
951     assert(all_of(IV->users(),
952                   [](const VPUser *U) {
953                     if (isa<VPScalarIVStepsRecipe>(U))
954                       return true;
955                     auto *VPI = cast<VPInstruction>(U);
956                     return VPI->getOpcode() ==
957                                VPInstruction::CanonicalIVIncrement ||
958                            VPI->getOpcode() ==
959                                VPInstruction::CanonicalIVIncrementNUW;
960                   }) &&
961            "the canonical IV should only be used by its increments or "
962            "ScalarIVSteps when "
963            "resetting the start value");
964     IV->setOperand(0, VPV);
965   }
966 }
967 
968 /// Generate the code inside the preheader and body of the vectorized loop.
969 /// Assumes a single pre-header basic-block was created for this. Introduce
970 /// additional basic-blocks as needed, and fill them all.
971 void VPlan::execute(VPTransformState *State) {
972   // Set the reverse mapping from VPValues to Values for code generation.
973   for (auto &Entry : Value2VPValue)
974     State->VPValue2Value[Entry.second] = Entry.first;
975 
976   // Initialize CFG state.
977   State->CFG.PrevVPBB = nullptr;
978   State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
979   BasicBlock *VectorPreHeader = State->CFG.PrevBB;
980   State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
981 
982   // Generate code in the loop pre-header and body.
983   for (VPBlockBase *Block : depth_first(Entry))
984     Block->execute(State);
985 
986   VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
987   BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
988 
989   // Fix the latch value of canonical, reduction and first-order recurrences
990   // phis in the vector loop.
991   VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
992   for (VPRecipeBase &R : Header->phis()) {
993     // Skip phi-like recipes that generate their backedege values themselves.
994     if (isa<VPWidenPHIRecipe>(&R))
995       continue;
996 
997     if (isa<VPWidenPointerInductionRecipe>(&R) ||
998         isa<VPWidenIntOrFpInductionRecipe>(&R)) {
999       PHINode *Phi = nullptr;
1000       if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1001         Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0));
1002       } else {
1003         auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1004         // TODO: Split off the case that all users of a pointer phi are scalar
1005         // from the VPWidenPointerInductionRecipe.
1006         if (WidenPhi->onlyScalarsGenerated(State->VF))
1007           continue;
1008 
1009         auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
1010         Phi = cast<PHINode>(GEP->getPointerOperand());
1011       }
1012 
1013       Phi->setIncomingBlock(1, VectorLatchBB);
1014 
1015       // Move the last step to the end of the latch block. This ensures
1016       // consistent placement of all induction updates.
1017       Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1018       Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
1019       continue;
1020     }
1021 
1022     auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1023     // For  canonical IV, first-order recurrences and in-order reduction phis,
1024     // only a single part is generated, which provides the last part from the
1025     // previous iteration. For non-ordered reductions all UF parts are
1026     // generated.
1027     bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
1028                             isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) ||
1029                             cast<VPReductionPHIRecipe>(PhiR)->isOrdered();
1030     unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
1031 
1032     for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1033       Value *Phi = State->get(PhiR, Part);
1034       Value *Val = State->get(PhiR->getBackedgeValue(),
1035                               SinglePartNeeded ? State->UF - 1 : Part);
1036       cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1037     }
1038   }
1039 
1040   // We do not attempt to preserve DT for outer loop vectorization currently.
1041   if (!EnableVPlanNativePath) {
1042     BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header];
1043     State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader);
1044     updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB,
1045                         State->CFG.ExitBB);
1046   }
1047 }
1048 
1049 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1050 LLVM_DUMP_METHOD
1051 void VPlan::print(raw_ostream &O) const {
1052   VPSlotTracker SlotTracker(this);
1053 
1054   O << "VPlan '" << Name << "' {";
1055 
1056   if (VectorTripCount.getNumUsers() > 0) {
1057     O << "\nLive-in ";
1058     VectorTripCount.printAsOperand(O, SlotTracker);
1059     O << " = vector-trip-count\n";
1060   }
1061 
1062   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1063     O << "\nLive-in ";
1064     BackedgeTakenCount->printAsOperand(O, SlotTracker);
1065     O << " = backedge-taken count\n";
1066   }
1067 
1068   for (const VPBlockBase *Block : depth_first(getEntry())) {
1069     O << '\n';
1070     Block->print(O, "", SlotTracker);
1071   }
1072 
1073   if (!LiveOuts.empty())
1074     O << "\n";
1075   for (auto &KV : LiveOuts) {
1076     O << "Live-out ";
1077     KV.second->getPhi()->printAsOperand(O);
1078     O << " = ";
1079     KV.second->getOperand(0)->printAsOperand(O, SlotTracker);
1080     O << "\n";
1081   }
1082 
1083   O << "}\n";
1084 }
1085 
1086 LLVM_DUMP_METHOD
1087 void VPlan::printDOT(raw_ostream &O) const {
1088   VPlanPrinter Printer(O, *this);
1089   Printer.dump();
1090 }
1091 
1092 LLVM_DUMP_METHOD
1093 void VPlan::dump() const { print(dbgs()); }
1094 #endif
1095 
1096 void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
1097   assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
1098   LiveOuts.insert({PN, new VPLiveOut(PN, V)});
1099 }
1100 
1101 void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB,
1102                                 BasicBlock *LoopLatchBB,
1103                                 BasicBlock *LoopExitBB) {
1104   // The vector body may be more than a single basic-block by this point.
1105   // Update the dominator tree information inside the vector body by propagating
1106   // it from header to latch, expecting only triangular control-flow, if any.
1107   BasicBlock *PostDomSucc = nullptr;
1108   for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
1109     // Get the list of successors of this block.
1110     std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
1111     assert(Succs.size() <= 2 &&
1112            "Basic block in vector loop has more than 2 successors.");
1113     PostDomSucc = Succs[0];
1114     if (Succs.size() == 1) {
1115       assert(PostDomSucc->getSinglePredecessor() &&
1116              "PostDom successor has more than one predecessor.");
1117       DT->addNewBlock(PostDomSucc, BB);
1118       continue;
1119     }
1120     BasicBlock *InterimSucc = Succs[1];
1121     if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
1122       PostDomSucc = Succs[1];
1123       InterimSucc = Succs[0];
1124     }
1125     assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
1126            "One successor of a basic block does not lead to the other.");
1127     assert(InterimSucc->getSinglePredecessor() &&
1128            "Interim successor has more than one predecessor.");
1129     assert(PostDomSucc->hasNPredecessors(2) &&
1130            "PostDom successor has more than two predecessors.");
1131     DT->addNewBlock(InterimSucc, BB);
1132     DT->addNewBlock(PostDomSucc, BB);
1133   }
1134   // Latch block is a new dominator for the loop exit.
1135   DT->changeImmediateDominator(LoopExitBB, LoopLatchBB);
1136   assert(DT->verify(DominatorTree::VerificationLevel::Fast));
1137 }
1138 
1139 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1140 Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
1141   return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
1142          Twine(getOrCreateBID(Block));
1143 }
1144 
1145 Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
1146   const std::string &Name = Block->getName();
1147   if (!Name.empty())
1148     return Name;
1149   return "VPB" + Twine(getOrCreateBID(Block));
1150 }
1151 
1152 void VPlanPrinter::dump() {
1153   Depth = 1;
1154   bumpIndent(0);
1155   OS << "digraph VPlan {\n";
1156   OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1157   if (!Plan.getName().empty())
1158     OS << "\\n" << DOT::EscapeString(Plan.getName());
1159   if (Plan.BackedgeTakenCount) {
1160     OS << ", where:\\n";
1161     Plan.BackedgeTakenCount->print(OS, SlotTracker);
1162     OS << " := BackedgeTakenCount";
1163   }
1164   OS << "\"]\n";
1165   OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
1166   OS << "edge [fontname=Courier, fontsize=30]\n";
1167   OS << "compound=true\n";
1168 
1169   for (const VPBlockBase *Block : depth_first(Plan.getEntry()))
1170     dumpBlock(Block);
1171 
1172   OS << "}\n";
1173 }
1174 
1175 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
1176   if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
1177     dumpBasicBlock(BasicBlock);
1178   else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1179     dumpRegion(Region);
1180   else
1181     llvm_unreachable("Unsupported kind of VPBlock.");
1182 }
1183 
1184 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
1185                             bool Hidden, const Twine &Label) {
1186   // Due to "dot" we print an edge between two regions as an edge between the
1187   // exiting basic block and the entry basic of the respective regions.
1188   const VPBlockBase *Tail = From->getExitingBasicBlock();
1189   const VPBlockBase *Head = To->getEntryBasicBlock();
1190   OS << Indent << getUID(Tail) << " -> " << getUID(Head);
1191   OS << " [ label=\"" << Label << '\"';
1192   if (Tail != From)
1193     OS << " ltail=" << getUID(From);
1194   if (Head != To)
1195     OS << " lhead=" << getUID(To);
1196   if (Hidden)
1197     OS << "; splines=none";
1198   OS << "]\n";
1199 }
1200 
1201 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
1202   auto &Successors = Block->getSuccessors();
1203   if (Successors.size() == 1)
1204     drawEdge(Block, Successors.front(), false, "");
1205   else if (Successors.size() == 2) {
1206     drawEdge(Block, Successors.front(), false, "T");
1207     drawEdge(Block, Successors.back(), false, "F");
1208   } else {
1209     unsigned SuccessorNumber = 0;
1210     for (auto *Successor : Successors)
1211       drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
1212   }
1213 }
1214 
1215 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
1216   // Implement dot-formatted dump by performing plain-text dump into the
1217   // temporary storage followed by some post-processing.
1218   OS << Indent << getUID(BasicBlock) << " [label =\n";
1219   bumpIndent(1);
1220   std::string Str;
1221   raw_string_ostream SS(Str);
1222   // Use no indentation as we need to wrap the lines into quotes ourselves.
1223   BasicBlock->print(SS, "", SlotTracker);
1224 
1225   // We need to process each line of the output separately, so split
1226   // single-string plain-text dump.
1227   SmallVector<StringRef, 0> Lines;
1228   StringRef(Str).rtrim('\n').split(Lines, "\n");
1229 
1230   auto EmitLine = [&](StringRef Line, StringRef Suffix) {
1231     OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
1232   };
1233 
1234   // Don't need the "+" after the last line.
1235   for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
1236     EmitLine(Line, " +\n");
1237   EmitLine(Lines.back(), "\n");
1238 
1239   bumpIndent(-1);
1240   OS << Indent << "]\n";
1241 
1242   dumpEdges(BasicBlock);
1243 }
1244 
1245 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
1246   OS << Indent << "subgraph " << getUID(Region) << " {\n";
1247   bumpIndent(1);
1248   OS << Indent << "fontname=Courier\n"
1249      << Indent << "label=\""
1250      << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
1251      << DOT::EscapeString(Region->getName()) << "\"\n";
1252   // Dump the blocks of the region.
1253   assert(Region->getEntry() && "Region contains no inner blocks.");
1254   for (const VPBlockBase *Block : depth_first(Region->getEntry()))
1255     dumpBlock(Block);
1256   bumpIndent(-1);
1257   OS << Indent << "}\n";
1258   dumpEdges(Region);
1259 }
1260 
1261 void VPlanIngredient::print(raw_ostream &O) const {
1262   if (auto *Inst = dyn_cast<Instruction>(V)) {
1263     if (!Inst->getType()->isVoidTy()) {
1264       Inst->printAsOperand(O, false);
1265       O << " = ";
1266     }
1267     O << Inst->getOpcodeName() << " ";
1268     unsigned E = Inst->getNumOperands();
1269     if (E > 0) {
1270       Inst->getOperand(0)->printAsOperand(O, false);
1271       for (unsigned I = 1; I < E; ++I)
1272         Inst->getOperand(I)->printAsOperand(O << ", ", false);
1273     }
1274   } else // !Inst
1275     V->printAsOperand(O, false);
1276 }
1277 
1278 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
1279                               VPSlotTracker &SlotTracker) const {
1280   O << Indent << "WIDEN-CALL ";
1281 
1282   auto *CI = cast<CallInst>(getUnderlyingInstr());
1283   if (CI->getType()->isVoidTy())
1284     O << "void ";
1285   else {
1286     printAsOperand(O, SlotTracker);
1287     O << " = ";
1288   }
1289 
1290   O << "call @" << CI->getCalledFunction()->getName() << "(";
1291   printOperands(O, SlotTracker);
1292   O << ")";
1293 }
1294 
1295 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
1296                                 VPSlotTracker &SlotTracker) const {
1297   O << Indent << "WIDEN-SELECT ";
1298   printAsOperand(O, SlotTracker);
1299   O << " = select ";
1300   getOperand(0)->printAsOperand(O, SlotTracker);
1301   O << ", ";
1302   getOperand(1)->printAsOperand(O, SlotTracker);
1303   O << ", ";
1304   getOperand(2)->printAsOperand(O, SlotTracker);
1305   O << (InvariantCond ? " (condition is loop invariant)" : "");
1306 }
1307 
1308 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
1309                           VPSlotTracker &SlotTracker) const {
1310   O << Indent << "WIDEN ";
1311   printAsOperand(O, SlotTracker);
1312   O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
1313   printOperands(O, SlotTracker);
1314 }
1315 
1316 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
1317                                           VPSlotTracker &SlotTracker) const {
1318   O << Indent << "WIDEN-INDUCTION";
1319   if (getTruncInst()) {
1320     O << "\\l\"";
1321     O << " +\n" << Indent << "\"  " << VPlanIngredient(IV) << "\\l\"";
1322     O << " +\n" << Indent << "\"  ";
1323     getVPValue(0)->printAsOperand(O, SlotTracker);
1324   } else
1325     O << " " << VPlanIngredient(IV);
1326 
1327   O << ", ";
1328   getStepValue()->printAsOperand(O, SlotTracker);
1329 }
1330 
1331 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
1332                                           VPSlotTracker &SlotTracker) const {
1333   O << Indent << "EMIT ";
1334   printAsOperand(O, SlotTracker);
1335   O << " = WIDEN-POINTER-INDUCTION ";
1336   getStartValue()->printAsOperand(O, SlotTracker);
1337   O << ", " << *IndDesc.getStep();
1338 }
1339 
1340 #endif
1341 
1342 bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
1343   auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1344   auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
1345   return StartC && StartC->isZero() && StepC && StepC->isOne();
1346 }
1347 
1348 VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const {
1349   return cast<VPCanonicalIVPHIRecipe>(getOperand(0));
1350 }
1351 
1352 bool VPScalarIVStepsRecipe::isCanonical() const {
1353   auto *CanIV = getCanonicalIV();
1354   // The start value of the steps-recipe must match the start value of the
1355   // canonical induction and it must step by 1.
1356   if (CanIV->getStartValue() != getStartValue())
1357     return false;
1358   auto *StepVPV = getStepValue();
1359   if (StepVPV->getDef())
1360     return false;
1361   auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue());
1362   return StepC && StepC->isOne();
1363 }
1364 
1365 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1366 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
1367                                   VPSlotTracker &SlotTracker) const {
1368   O << Indent;
1369   printAsOperand(O, SlotTracker);
1370   O << Indent << "= SCALAR-STEPS ";
1371   printOperands(O, SlotTracker);
1372 }
1373 
1374 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
1375                              VPSlotTracker &SlotTracker) const {
1376   O << Indent << "WIDEN-GEP ";
1377   O << (IsPtrLoopInvariant ? "Inv" : "Var");
1378   size_t IndicesNumber = IsIndexLoopInvariant.size();
1379   for (size_t I = 0; I < IndicesNumber; ++I)
1380     O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
1381 
1382   O << " ";
1383   printAsOperand(O, SlotTracker);
1384   O << " = getelementptr ";
1385   printOperands(O, SlotTracker);
1386 }
1387 
1388 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1389                              VPSlotTracker &SlotTracker) const {
1390   O << Indent << "WIDEN-PHI ";
1391 
1392   auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
1393   // Unless all incoming values are modeled in VPlan  print the original PHI
1394   // directly.
1395   // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
1396   // values as VPValues.
1397   if (getNumOperands() != OriginalPhi->getNumOperands()) {
1398     O << VPlanIngredient(OriginalPhi);
1399     return;
1400   }
1401 
1402   printAsOperand(O, SlotTracker);
1403   O << " = phi ";
1404   printOperands(O, SlotTracker);
1405 }
1406 
1407 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
1408                           VPSlotTracker &SlotTracker) const {
1409   O << Indent << "BLEND ";
1410   Phi->printAsOperand(O, false);
1411   O << " =";
1412   if (getNumIncomingValues() == 1) {
1413     // Not a User of any mask: not really blending, this is a
1414     // single-predecessor phi.
1415     O << " ";
1416     getIncomingValue(0)->printAsOperand(O, SlotTracker);
1417   } else {
1418     for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
1419       O << " ";
1420       getIncomingValue(I)->printAsOperand(O, SlotTracker);
1421       O << "/";
1422       getMask(I)->printAsOperand(O, SlotTracker);
1423     }
1424   }
1425 }
1426 
1427 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
1428                               VPSlotTracker &SlotTracker) const {
1429   O << Indent << "REDUCE ";
1430   printAsOperand(O, SlotTracker);
1431   O << " = ";
1432   getChainOp()->printAsOperand(O, SlotTracker);
1433   O << " +";
1434   if (isa<FPMathOperator>(getUnderlyingInstr()))
1435     O << getUnderlyingInstr()->getFastMathFlags();
1436   O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
1437   getVecOp()->printAsOperand(O, SlotTracker);
1438   if (getCondOp()) {
1439     O << ", ";
1440     getCondOp()->printAsOperand(O, SlotTracker);
1441   }
1442   O << ")";
1443   if (RdxDesc->IntermediateStore)
1444     O << " (with final reduction value stored in invariant address sank "
1445          "outside of loop)";
1446 }
1447 
1448 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
1449                               VPSlotTracker &SlotTracker) const {
1450   O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
1451 
1452   if (!getUnderlyingInstr()->getType()->isVoidTy()) {
1453     printAsOperand(O, SlotTracker);
1454     O << " = ";
1455   }
1456   if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
1457     O << "call @" << CB->getCalledFunction()->getName() << "(";
1458     interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
1459                     O, [&O, &SlotTracker](VPValue *Op) {
1460                       Op->printAsOperand(O, SlotTracker);
1461                     });
1462     O << ")";
1463   } else {
1464     O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
1465     printOperands(O, SlotTracker);
1466   }
1467 
1468   if (AlsoPack)
1469     O << " (S->V)";
1470 }
1471 
1472 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1473                                 VPSlotTracker &SlotTracker) const {
1474   O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1475   printAsOperand(O, SlotTracker);
1476   O << " = ";
1477   printOperands(O, SlotTracker);
1478 }
1479 
1480 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
1481                                            VPSlotTracker &SlotTracker) const {
1482   O << Indent << "WIDEN ";
1483 
1484   if (!isStore()) {
1485     getVPSingleValue()->printAsOperand(O, SlotTracker);
1486     O << " = ";
1487   }
1488   O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
1489 
1490   printOperands(O, SlotTracker);
1491 }
1492 #endif
1493 
1494 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
1495   Value *Start = getStartValue()->getLiveInIRValue();
1496   PHINode *EntryPart = PHINode::Create(
1497       Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
1498 
1499   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1500   EntryPart->addIncoming(Start, VectorPH);
1501   EntryPart->setDebugLoc(DL);
1502   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1503     State.set(this, EntryPart, Part);
1504 }
1505 
1506 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1507 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1508                                    VPSlotTracker &SlotTracker) const {
1509   O << Indent << "EMIT ";
1510   printAsOperand(O, SlotTracker);
1511   O << " = CANONICAL-INDUCTION";
1512 }
1513 #endif
1514 
1515 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
1516   bool IsUniform = vputils::onlyFirstLaneUsed(this);
1517   return all_of(users(),
1518                 [&](const VPUser *U) { return U->usesScalars(this); }) &&
1519          (IsUniform || !VF.isScalable());
1520 }
1521 
1522 void VPExpandSCEVRecipe::execute(VPTransformState &State) {
1523   assert(!State.Instance && "cannot be used in per-lane");
1524   const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
1525   SCEVExpander Exp(SE, DL, "induction");
1526 
1527   Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
1528                                  &*State.Builder.GetInsertPoint());
1529 
1530   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1531     State.set(this, Res, Part);
1532 }
1533 
1534 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1535 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
1536                                VPSlotTracker &SlotTracker) const {
1537   O << Indent << "EMIT ";
1538   getVPSingleValue()->printAsOperand(O, SlotTracker);
1539   O << " = EXPAND SCEV " << *Expr;
1540 }
1541 #endif
1542 
1543 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
1544   Value *CanonicalIV = State.get(getOperand(0), 0);
1545   Type *STy = CanonicalIV->getType();
1546   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
1547   ElementCount VF = State.VF;
1548   Value *VStart = VF.isScalar()
1549                       ? CanonicalIV
1550                       : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
1551   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1552     Value *VStep = createStepForVF(Builder, STy, VF, Part);
1553     if (VF.isVector()) {
1554       VStep = Builder.CreateVectorSplat(VF, VStep);
1555       VStep = Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
1556     }
1557     Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1558     State.set(this, CanonicalVectorIV, Part);
1559   }
1560 }
1561 
1562 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1563 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
1564                                      VPSlotTracker &SlotTracker) const {
1565   O << Indent << "EMIT ";
1566   printAsOperand(O, SlotTracker);
1567   O << " = WIDEN-CANONICAL-INDUCTION ";
1568   printOperands(O, SlotTracker);
1569 }
1570 #endif
1571 
1572 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
1573   auto &Builder = State.Builder;
1574   // Create a vector from the initial value.
1575   auto *VectorInit = getStartValue()->getLiveInIRValue();
1576 
1577   Type *VecTy = State.VF.isScalar()
1578                     ? VectorInit->getType()
1579                     : VectorType::get(VectorInit->getType(), State.VF);
1580 
1581   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1582   if (State.VF.isVector()) {
1583     auto *IdxTy = Builder.getInt32Ty();
1584     auto *One = ConstantInt::get(IdxTy, 1);
1585     IRBuilder<>::InsertPointGuard Guard(Builder);
1586     Builder.SetInsertPoint(VectorPH->getTerminator());
1587     auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
1588     auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
1589     VectorInit = Builder.CreateInsertElement(
1590         PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
1591   }
1592 
1593   // Create a phi node for the new recurrence.
1594   PHINode *EntryPart = PHINode::Create(
1595       VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
1596   EntryPart->addIncoming(VectorInit, VectorPH);
1597   State.set(this, EntryPart, 0);
1598 }
1599 
1600 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1601 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
1602                                             VPSlotTracker &SlotTracker) const {
1603   O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
1604   printAsOperand(O, SlotTracker);
1605   O << " = phi ";
1606   printOperands(O, SlotTracker);
1607 }
1608 #endif
1609 
1610 void VPReductionPHIRecipe::execute(VPTransformState &State) {
1611   PHINode *PN = cast<PHINode>(getUnderlyingValue());
1612   auto &Builder = State.Builder;
1613 
1614   // In order to support recurrences we need to be able to vectorize Phi nodes.
1615   // Phi nodes have cycles, so we need to vectorize them in two stages. This is
1616   // stage #1: We create a new vector PHI node with no incoming edges. We'll use
1617   // this value when we vectorize all of the instructions that use the PHI.
1618   bool ScalarPHI = State.VF.isScalar() || IsInLoop;
1619   Type *VecTy =
1620       ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
1621 
1622   BasicBlock *HeaderBB = State.CFG.PrevBB;
1623   assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
1624          "recipe must be in the vector loop header");
1625   unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
1626   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1627     Value *EntryPart =
1628         PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
1629     State.set(this, EntryPart, Part);
1630   }
1631 
1632   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1633 
1634   // Reductions do not have to start at zero. They can start with
1635   // any loop invariant values.
1636   VPValue *StartVPV = getStartValue();
1637   Value *StartV = StartVPV->getLiveInIRValue();
1638 
1639   Value *Iden = nullptr;
1640   RecurKind RK = RdxDesc.getRecurrenceKind();
1641   if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
1642       RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
1643     // MinMax reduction have the start value as their identify.
1644     if (ScalarPHI) {
1645       Iden = StartV;
1646     } else {
1647       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1648       Builder.SetInsertPoint(VectorPH->getTerminator());
1649       StartV = Iden =
1650           Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
1651     }
1652   } else {
1653     Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
1654                                          RdxDesc.getFastMathFlags());
1655 
1656     if (!ScalarPHI) {
1657       Iden = Builder.CreateVectorSplat(State.VF, Iden);
1658       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1659       Builder.SetInsertPoint(VectorPH->getTerminator());
1660       Constant *Zero = Builder.getInt32(0);
1661       StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
1662     }
1663   }
1664 
1665   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1666     Value *EntryPart = State.get(this, Part);
1667     // Make sure to add the reduction start value only to the
1668     // first unroll part.
1669     Value *StartVal = (Part == 0) ? StartV : Iden;
1670     cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
1671   }
1672 }
1673 
1674 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1675 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1676                                  VPSlotTracker &SlotTracker) const {
1677   O << Indent << "WIDEN-REDUCTION-PHI ";
1678 
1679   printAsOperand(O, SlotTracker);
1680   O << " = phi ";
1681   printOperands(O, SlotTracker);
1682 }
1683 #endif
1684 
1685 template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
1686 
1687 void VPValue::replaceAllUsesWith(VPValue *New) {
1688   for (unsigned J = 0; J < getNumUsers();) {
1689     VPUser *User = Users[J];
1690     unsigned NumUsers = getNumUsers();
1691     for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
1692       if (User->getOperand(I) == this)
1693         User->setOperand(I, New);
1694     // If a user got removed after updating the current user, the next user to
1695     // update will be moved to the current position, so we only need to
1696     // increment the index if the number of users did not change.
1697     if (NumUsers == getNumUsers())
1698       J++;
1699   }
1700 }
1701 
1702 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1703 void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const {
1704   if (const Value *UV = getUnderlyingValue()) {
1705     OS << "ir<";
1706     UV->printAsOperand(OS, false);
1707     OS << ">";
1708     return;
1709   }
1710 
1711   unsigned Slot = Tracker.getSlot(this);
1712   if (Slot == unsigned(-1))
1713     OS << "<badref>";
1714   else
1715     OS << "vp<%" << Tracker.getSlot(this) << ">";
1716 }
1717 
1718 void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
1719   interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
1720     Op->printAsOperand(O, SlotTracker);
1721   });
1722 }
1723 #endif
1724 
1725 void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
1726                                           Old2NewTy &Old2New,
1727                                           InterleavedAccessInfo &IAI) {
1728   ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry());
1729   for (VPBlockBase *Base : RPOT) {
1730     visitBlock(Base, Old2New, IAI);
1731   }
1732 }
1733 
1734 void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
1735                                          InterleavedAccessInfo &IAI) {
1736   if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
1737     for (VPRecipeBase &VPI : *VPBB) {
1738       if (isa<VPHeaderPHIRecipe>(&VPI))
1739         continue;
1740       assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
1741       auto *VPInst = cast<VPInstruction>(&VPI);
1742       auto *Inst = cast<Instruction>(VPInst->getUnderlyingValue());
1743       auto *IG = IAI.getInterleaveGroup(Inst);
1744       if (!IG)
1745         continue;
1746 
1747       auto NewIGIter = Old2New.find(IG);
1748       if (NewIGIter == Old2New.end())
1749         Old2New[IG] = new InterleaveGroup<VPInstruction>(
1750             IG->getFactor(), IG->isReverse(), IG->getAlign());
1751 
1752       if (Inst == IG->getInsertPos())
1753         Old2New[IG]->setInsertPos(VPInst);
1754 
1755       InterleaveGroupMap[VPInst] = Old2New[IG];
1756       InterleaveGroupMap[VPInst]->insertMember(
1757           VPInst, IG->getIndex(Inst),
1758           Align(IG->isReverse() ? (-1) * int(IG->getFactor())
1759                                 : IG->getFactor()));
1760     }
1761   } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1762     visitRegion(Region, Old2New, IAI);
1763   else
1764     llvm_unreachable("Unsupported kind of VPBlock.");
1765 }
1766 
1767 VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
1768                                                  InterleavedAccessInfo &IAI) {
1769   Old2NewTy Old2New;
1770   visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI);
1771 }
1772 
1773 void VPSlotTracker::assignSlot(const VPValue *V) {
1774   assert(Slots.find(V) == Slots.end() && "VPValue already has a slot!");
1775   Slots[V] = NextSlot++;
1776 }
1777 
1778 void VPSlotTracker::assignSlots(const VPlan &Plan) {
1779 
1780   for (const auto &P : Plan.VPExternalDefs)
1781     assignSlot(P.second);
1782 
1783   assignSlot(&Plan.VectorTripCount);
1784   if (Plan.BackedgeTakenCount)
1785     assignSlot(Plan.BackedgeTakenCount);
1786 
1787   ReversePostOrderTraversal<
1788       VPBlockRecursiveTraversalWrapper<const VPBlockBase *>>
1789       RPOT(VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(
1790           Plan.getEntry()));
1791   for (const VPBasicBlock *VPBB :
1792        VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
1793     for (const VPRecipeBase &Recipe : *VPBB)
1794       for (VPValue *Def : Recipe.definedValues())
1795         assignSlot(Def);
1796 }
1797 
1798 bool vputils::onlyFirstLaneUsed(VPValue *Def) {
1799   return all_of(Def->users(),
1800                 [Def](VPUser *U) { return U->onlyFirstLaneUsed(Def); });
1801 }
1802 
1803 VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
1804                                                 ScalarEvolution &SE) {
1805   if (auto *E = dyn_cast<SCEVConstant>(Expr))
1806     return Plan.getOrAddExternalDef(E->getValue());
1807   if (auto *E = dyn_cast<SCEVUnknown>(Expr))
1808     return Plan.getOrAddExternalDef(E->getValue());
1809 
1810   VPBasicBlock *Preheader = Plan.getEntry()->getEntryBasicBlock();
1811   VPValue *Step = new VPExpandSCEVRecipe(Expr, SE);
1812   Preheader->appendRecipe(cast<VPRecipeBase>(Step->getDef()));
1813   return Step;
1814 }
1815