xref: /llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (revision bccb7ed8ac289a1503ed88edf56b27eb11ed5d5d)
1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains implementations for different VPlan recipes.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "VPlan.h"
15 #include "VPlanAnalysis.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/Analysis/IVDescriptors.h"
20 #include "llvm/IR/BasicBlock.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/Instruction.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/Type.h"
25 #include "llvm/IR/Value.h"
26 #include "llvm/Support/Casting.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
31 #include "llvm/Transforms/Utils/LoopUtils.h"
32 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
33 #include <cassert>
34 
35 using namespace llvm;
36 
37 using VectorParts = SmallVector<Value *, 2>;
38 
39 namespace llvm {
40 extern cl::opt<bool> EnableVPlanNativePath;
41 }
42 
43 #define LV_NAME "loop-vectorize"
44 #define DEBUG_TYPE LV_NAME
45 
46 bool VPRecipeBase::mayWriteToMemory() const {
47   switch (getVPDefID()) {
48   case VPInterleaveSC:
49     return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
50   case VPWidenStoreEVLSC:
51   case VPWidenStoreSC:
52     return true;
53   case VPReplicateSC:
54     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
55         ->mayWriteToMemory();
56   case VPWidenCallSC:
57     return !cast<VPWidenCallRecipe>(this)
58                 ->getCalledScalarFunction()
59                 ->onlyReadsMemory();
60   case VPBranchOnMaskSC:
61   case VPScalarIVStepsSC:
62   case VPPredInstPHISC:
63     return false;
64   case VPBlendSC:
65   case VPReductionSC:
66   case VPWidenCanonicalIVSC:
67   case VPWidenCastSC:
68   case VPWidenGEPSC:
69   case VPWidenIntOrFpInductionSC:
70   case VPWidenLoadEVLSC:
71   case VPWidenLoadSC:
72   case VPWidenPHISC:
73   case VPWidenSC:
74   case VPWidenSelectSC: {
75     const Instruction *I =
76         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
77     (void)I;
78     assert((!I || !I->mayWriteToMemory()) &&
79            "underlying instruction may write to memory");
80     return false;
81   }
82   default:
83     return true;
84   }
85 }
86 
87 bool VPRecipeBase::mayReadFromMemory() const {
88   switch (getVPDefID()) {
89   case VPWidenLoadEVLSC:
90   case VPWidenLoadSC:
91     return true;
92   case VPReplicateSC:
93     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
94         ->mayReadFromMemory();
95   case VPWidenCallSC:
96     return !cast<VPWidenCallRecipe>(this)
97                 ->getCalledScalarFunction()
98                 ->onlyWritesMemory();
99   case VPBranchOnMaskSC:
100   case VPPredInstPHISC:
101   case VPScalarIVStepsSC:
102   case VPWidenStoreEVLSC:
103   case VPWidenStoreSC:
104     return false;
105   case VPBlendSC:
106   case VPReductionSC:
107   case VPWidenCanonicalIVSC:
108   case VPWidenCastSC:
109   case VPWidenGEPSC:
110   case VPWidenIntOrFpInductionSC:
111   case VPWidenPHISC:
112   case VPWidenSC:
113   case VPWidenSelectSC: {
114     const Instruction *I =
115         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
116     (void)I;
117     assert((!I || !I->mayReadFromMemory()) &&
118            "underlying instruction may read from memory");
119     return false;
120   }
121   default:
122     return true;
123   }
124 }
125 
126 bool VPRecipeBase::mayHaveSideEffects() const {
127   switch (getVPDefID()) {
128   case VPDerivedIVSC:
129   case VPPredInstPHISC:
130   case VPScalarCastSC:
131     return false;
132   case VPInstructionSC:
133     switch (cast<VPInstruction>(this)->getOpcode()) {
134     case Instruction::Or:
135     case Instruction::ICmp:
136     case Instruction::Select:
137     case VPInstruction::Not:
138     case VPInstruction::CalculateTripCountMinusVF:
139     case VPInstruction::CanonicalIVIncrementForPart:
140     case VPInstruction::PtrAdd:
141       return false;
142     default:
143       return true;
144     }
145   case VPWidenCallSC: {
146     Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
147     return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
148   }
149   case VPBlendSC:
150   case VPReductionSC:
151   case VPScalarIVStepsSC:
152   case VPWidenCanonicalIVSC:
153   case VPWidenCastSC:
154   case VPWidenGEPSC:
155   case VPWidenIntOrFpInductionSC:
156   case VPWidenPHISC:
157   case VPWidenPointerInductionSC:
158   case VPWidenSC:
159   case VPWidenSelectSC: {
160     const Instruction *I =
161         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
162     (void)I;
163     assert((!I || !I->mayHaveSideEffects()) &&
164            "underlying instruction has side-effects");
165     return false;
166   }
167   case VPInterleaveSC:
168     return mayWriteToMemory();
169   case VPWidenLoadEVLSC:
170   case VPWidenLoadSC:
171   case VPWidenStoreEVLSC:
172   case VPWidenStoreSC:
173     assert(
174         cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
175             mayWriteToMemory() &&
176         "mayHaveSideffects result for ingredient differs from this "
177         "implementation");
178     return mayWriteToMemory();
179   case VPReplicateSC: {
180     auto *R = cast<VPReplicateRecipe>(this);
181     return R->getUnderlyingInstr()->mayHaveSideEffects();
182   }
183   default:
184     return true;
185   }
186 }
187 
188 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
189   auto Lane = VPLane::getLastLaneForVF(State.VF);
190   VPValue *ExitValue = getOperand(0);
191   if (vputils::isUniformAfterVectorization(ExitValue))
192     Lane = VPLane::getFirstLane();
193   VPBasicBlock *MiddleVPBB =
194       cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
195   assert(MiddleVPBB->getNumSuccessors() == 0 &&
196          "the middle block must not have any successors");
197   BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB];
198   Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
199                    MiddleBB);
200 }
201 
202 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
203 void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
204   O << "Live-out ";
205   getPhi()->printAsOperand(O);
206   O << " = ";
207   getOperand(0)->printAsOperand(O, SlotTracker);
208   O << "\n";
209 }
210 #endif
211 
212 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
213   assert(!Parent && "Recipe already in some VPBasicBlock");
214   assert(InsertPos->getParent() &&
215          "Insertion position not in any VPBasicBlock");
216   InsertPos->getParent()->insert(this, InsertPos->getIterator());
217 }
218 
219 void VPRecipeBase::insertBefore(VPBasicBlock &BB,
220                                 iplist<VPRecipeBase>::iterator I) {
221   assert(!Parent && "Recipe already in some VPBasicBlock");
222   assert(I == BB.end() || I->getParent() == &BB);
223   BB.insert(this, I);
224 }
225 
226 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
227   assert(!Parent && "Recipe already in some VPBasicBlock");
228   assert(InsertPos->getParent() &&
229          "Insertion position not in any VPBasicBlock");
230   InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
231 }
232 
233 void VPRecipeBase::removeFromParent() {
234   assert(getParent() && "Recipe not in any VPBasicBlock");
235   getParent()->getRecipeList().remove(getIterator());
236   Parent = nullptr;
237 }
238 
239 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
240   assert(getParent() && "Recipe not in any VPBasicBlock");
241   return getParent()->getRecipeList().erase(getIterator());
242 }
243 
244 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
245   removeFromParent();
246   insertAfter(InsertPos);
247 }
248 
249 void VPRecipeBase::moveBefore(VPBasicBlock &BB,
250                               iplist<VPRecipeBase>::iterator I) {
251   removeFromParent();
252   insertBefore(BB, I);
253 }
254 
255 FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const {
256   assert(OpType == OperationType::FPMathOp &&
257          "recipe doesn't have fast math flags");
258   FastMathFlags Res;
259   Res.setAllowReassoc(FMFs.AllowReassoc);
260   Res.setNoNaNs(FMFs.NoNaNs);
261   Res.setNoInfs(FMFs.NoInfs);
262   Res.setNoSignedZeros(FMFs.NoSignedZeros);
263   Res.setAllowReciprocal(FMFs.AllowReciprocal);
264   Res.setAllowContract(FMFs.AllowContract);
265   Res.setApproxFunc(FMFs.ApproxFunc);
266   return Res;
267 }
268 
269 VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred,
270                              VPValue *A, VPValue *B, DebugLoc DL,
271                              const Twine &Name)
272     : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
273                           Pred, DL),
274       Opcode(Opcode), Name(Name.str()) {
275   assert(Opcode == Instruction::ICmp &&
276          "only ICmp predicates supported at the moment");
277 }
278 
279 VPInstruction::VPInstruction(unsigned Opcode,
280                              std::initializer_list<VPValue *> Operands,
281                              FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
282     : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
283       Opcode(Opcode), Name(Name.str()) {
284   // Make sure the VPInstruction is a floating-point operation.
285   assert(isFPMathOp() && "this op can't take fast-math flags");
286 }
287 
288 bool VPInstruction::doesGeneratePerAllLanes() const {
289   return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
290 }
291 
292 bool VPInstruction::canGenerateScalarForFirstLane() const {
293   if (Instruction::isBinaryOp(getOpcode()))
294     return true;
295 
296   switch (Opcode) {
297   case VPInstruction::BranchOnCond:
298   case VPInstruction::BranchOnCount:
299   case VPInstruction::CalculateTripCountMinusVF:
300   case VPInstruction::CanonicalIVIncrementForPart:
301   case VPInstruction::ComputeReductionResult:
302   case VPInstruction::PtrAdd:
303   case VPInstruction::ExplicitVectorLength:
304     return true;
305   default:
306     return false;
307   }
308 }
309 
310 Value *VPInstruction::generatePerLane(VPTransformState &State,
311                                       const VPIteration &Lane) {
312   IRBuilderBase &Builder = State.Builder;
313 
314   assert(getOpcode() == VPInstruction::PtrAdd &&
315          "only PtrAdd opcodes are supported for now");
316   return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
317                               State.get(getOperand(1), Lane), Name);
318 }
319 
320 Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
321   IRBuilderBase &Builder = State.Builder;
322 
323   if (Instruction::isBinaryOp(getOpcode())) {
324     bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
325     if (Part != 0 && vputils::onlyFirstPartUsed(this))
326       return State.get(this, 0, OnlyFirstLaneUsed);
327 
328     Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
329     Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
330     auto *Res =
331         Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
332     if (auto *I = dyn_cast<Instruction>(Res))
333       setFlags(I);
334     return Res;
335   }
336 
337   switch (getOpcode()) {
338   case VPInstruction::Not: {
339     Value *A = State.get(getOperand(0), Part);
340     return Builder.CreateNot(A, Name);
341   }
342   case Instruction::ICmp: {
343     Value *A = State.get(getOperand(0), Part);
344     Value *B = State.get(getOperand(1), Part);
345     return Builder.CreateCmp(getPredicate(), A, B, Name);
346   }
347   case Instruction::Select: {
348     Value *Cond = State.get(getOperand(0), Part);
349     Value *Op1 = State.get(getOperand(1), Part);
350     Value *Op2 = State.get(getOperand(2), Part);
351     return Builder.CreateSelect(Cond, Op1, Op2, Name);
352   }
353   case VPInstruction::ActiveLaneMask: {
354     // Get first lane of vector induction variable.
355     Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
356     // Get the original loop tripcount.
357     Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0));
358 
359     // If this part of the active lane mask is scalar, generate the CMP directly
360     // to avoid unnecessary extracts.
361     if (State.VF.isScalar())
362       return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
363                                Name);
364 
365     auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
366     auto *PredTy = VectorType::get(Int1Ty, State.VF);
367     return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
368                                    {PredTy, ScalarTC->getType()},
369                                    {VIVElem0, ScalarTC}, nullptr, Name);
370   }
371   case VPInstruction::FirstOrderRecurrenceSplice: {
372     // Generate code to combine the previous and current values in vector v3.
373     //
374     //   vector.ph:
375     //     v_init = vector(..., ..., ..., a[-1])
376     //     br vector.body
377     //
378     //   vector.body
379     //     i = phi [0, vector.ph], [i+4, vector.body]
380     //     v1 = phi [v_init, vector.ph], [v2, vector.body]
381     //     v2 = a[i, i+1, i+2, i+3];
382     //     v3 = vector(v1(3), v2(0, 1, 2))
383 
384     // For the first part, use the recurrence phi (v1), otherwise v2.
385     auto *V1 = State.get(getOperand(0), 0);
386     Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
387     if (!PartMinus1->getType()->isVectorTy())
388       return PartMinus1;
389     Value *V2 = State.get(getOperand(1), Part);
390     return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name);
391   }
392   case VPInstruction::CalculateTripCountMinusVF: {
393     if (Part != 0)
394       return State.get(this, 0, /*IsScalar*/ true);
395 
396     Value *ScalarTC = State.get(getOperand(0), {0, 0});
397     Value *Step =
398         createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);
399     Value *Sub = Builder.CreateSub(ScalarTC, Step);
400     Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
401     Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
402     return Builder.CreateSelect(Cmp, Sub, Zero);
403   }
404   case VPInstruction::ExplicitVectorLength: {
405     // Compute EVL
406     auto GetEVL = [=](VPTransformState &State, Value *AVL) {
407       assert(AVL->getType()->isIntegerTy() &&
408              "Requested vector length should be an integer.");
409 
410       // TODO: Add support for MaxSafeDist for correct loop emission.
411       assert(State.VF.isScalable() && "Expected scalable vector factor.");
412       Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
413 
414       Value *EVL = State.Builder.CreateIntrinsic(
415           State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
416           {AVL, VFArg, State.Builder.getTrue()});
417       return EVL;
418     };
419     // TODO: Restructure this code with an explicit remainder loop, vsetvli can
420     // be outside of the main loop.
421     assert(Part == 0 && "No unrolling expected for predicated vectorization.");
422     // Compute VTC - IV as the AVL (requested vector length).
423     Value *Index = State.get(getOperand(0), VPIteration(0, 0));
424     Value *TripCount = State.get(getOperand(1), VPIteration(0, 0));
425     Value *AVL = State.Builder.CreateSub(TripCount, Index);
426     Value *EVL = GetEVL(State, AVL);
427     return EVL;
428   }
429   case VPInstruction::CanonicalIVIncrementForPart: {
430     auto *IV = State.get(getOperand(0), VPIteration(0, 0));
431     if (Part == 0)
432       return IV;
433 
434     // The canonical IV is incremented by the vectorization factor (num of SIMD
435     // elements) times the unroll part.
436     Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
437     return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
438                              hasNoSignedWrap());
439   }
440   case VPInstruction::BranchOnCond: {
441     if (Part != 0)
442       return nullptr;
443 
444     Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
445     VPRegionBlock *ParentRegion = getParent()->getParent();
446     VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
447 
448     // Replace the temporary unreachable terminator with a new conditional
449     // branch, hooking it up to backward destination for exiting blocks now and
450     // to forward destination(s) later when they are created.
451     BranchInst *CondBr =
452         Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
453 
454     if (getParent()->isExiting())
455       CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
456 
457     CondBr->setSuccessor(0, nullptr);
458     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
459     return CondBr;
460   }
461   case VPInstruction::BranchOnCount: {
462     if (Part != 0)
463       return nullptr;
464     // First create the compare.
465     Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true);
466     Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true);
467     Value *Cond = Builder.CreateICmpEQ(IV, TC);
468 
469     // Now create the branch.
470     auto *Plan = getParent()->getPlan();
471     VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
472     VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
473 
474     // Replace the temporary unreachable terminator with a new conditional
475     // branch, hooking it up to backward destination (the header) now and to the
476     // forward destination (the exit/middle block) later when it is created.
477     // Note that CreateCondBr expects a valid BB as first argument, so we need
478     // to set it to nullptr later.
479     BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
480                                               State.CFG.VPBB2IRBB[Header]);
481     CondBr->setSuccessor(0, nullptr);
482     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
483     return CondBr;
484   }
485   case VPInstruction::ComputeReductionResult: {
486     if (Part != 0)
487       return State.get(this, 0, /*IsScalar*/ true);
488 
489     // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
490     // and will be removed by breaking up the recipe further.
491     auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
492     auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
493     // Get its reduction variable descriptor.
494     const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
495 
496     RecurKind RK = RdxDesc.getRecurrenceKind();
497 
498     VPValue *LoopExitingDef = getOperand(1);
499     Type *PhiTy = OrigPhi->getType();
500     VectorParts RdxParts(State.UF);
501     for (unsigned Part = 0; Part < State.UF; ++Part)
502       RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop());
503 
504     // If the vector reduction can be performed in a smaller type, we truncate
505     // then extend the loop exit value to enable InstCombine to evaluate the
506     // entire expression in the smaller type.
507     // TODO: Handle this in truncateToMinBW.
508     if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
509       Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
510       for (unsigned Part = 0; Part < State.UF; ++Part)
511         RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
512     }
513     // Reduce all of the unrolled parts into a single vector.
514     Value *ReducedPartRdx = RdxParts[0];
515     unsigned Op = RecurrenceDescriptor::getOpcode(RK);
516     if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
517       Op = Instruction::Or;
518 
519     if (PhiR->isOrdered()) {
520       ReducedPartRdx = RdxParts[State.UF - 1];
521     } else {
522       // Floating-point operations should have some FMF to enable the reduction.
523       IRBuilderBase::FastMathFlagGuard FMFG(Builder);
524       Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
525       for (unsigned Part = 1; Part < State.UF; ++Part) {
526         Value *RdxPart = RdxParts[Part];
527         if (Op != Instruction::ICmp && Op != Instruction::FCmp)
528           ReducedPartRdx = Builder.CreateBinOp(
529               (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
530         else
531           ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
532       }
533     }
534 
535     // Create the reduction after the loop. Note that inloop reductions create
536     // the target reduction in the loop using a Reduction recipe.
537     if ((State.VF.isVector() ||
538          RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
539         !PhiR->isInLoop()) {
540       ReducedPartRdx =
541           createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
542       // If the reduction can be performed in a smaller type, we need to extend
543       // the reduction to the wider type before we branch to the original loop.
544       if (PhiTy != RdxDesc.getRecurrenceType())
545         ReducedPartRdx = RdxDesc.isSigned()
546                              ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
547                              : Builder.CreateZExt(ReducedPartRdx, PhiTy);
548     }
549 
550     // If there were stores of the reduction value to a uniform memory address
551     // inside the loop, create the final store here.
552     if (StoreInst *SI = RdxDesc.IntermediateStore) {
553       auto *NewSI = Builder.CreateAlignedStore(
554           ReducedPartRdx, SI->getPointerOperand(), SI->getAlign());
555       propagateMetadata(NewSI, SI);
556     }
557 
558     return ReducedPartRdx;
559   }
560   case VPInstruction::PtrAdd: {
561     assert(vputils::onlyFirstLaneUsed(this) &&
562            "can only generate first lane for PtrAdd");
563     Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true);
564     Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true);
565     return Builder.CreatePtrAdd(Ptr, Addend, Name);
566   }
567   default:
568     llvm_unreachable("Unsupported opcode for instruction");
569   }
570 }
571 
572 #if !defined(NDEBUG)
573 bool VPInstruction::isFPMathOp() const {
574   // Inspired by FPMathOperator::classof. Notable differences are that we don't
575   // support Call, PHI and Select opcodes here yet.
576   return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
577          Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
578          Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
579          Opcode == Instruction::FCmp || Opcode == Instruction::Select;
580 }
581 #endif
582 
583 void VPInstruction::execute(VPTransformState &State) {
584   assert(!State.Instance && "VPInstruction executing an Instance");
585   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
586   assert((hasFastMathFlags() == isFPMathOp() ||
587           getOpcode() == Instruction::Select) &&
588          "Recipe not a FPMathOp but has fast-math flags?");
589   if (hasFastMathFlags())
590     State.Builder.setFastMathFlags(getFastMathFlags());
591   State.setDebugLocFrom(getDebugLoc());
592   bool GeneratesPerFirstLaneOnly =
593       canGenerateScalarForFirstLane() &&
594       (vputils::onlyFirstLaneUsed(this) ||
595        getOpcode() == VPInstruction::ComputeReductionResult);
596   bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
597   for (unsigned Part = 0; Part < State.UF; ++Part) {
598     if (GeneratesPerAllLanes) {
599       for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
600            Lane != NumLanes; ++Lane) {
601         Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane));
602         assert(GeneratedValue && "generatePerLane must produce a value");
603         State.set(this, GeneratedValue, VPIteration(Part, Lane));
604       }
605       continue;
606     }
607 
608     Value *GeneratedValue = generatePerPart(State, Part);
609     if (!hasResult())
610       continue;
611     assert(GeneratedValue && "generatePerPart must produce a value");
612     assert((GeneratedValue->getType()->isVectorTy() ==
613                 !GeneratesPerFirstLaneOnly ||
614             State.VF.isScalar()) &&
615            "scalar value but not only first lane defined");
616     State.set(this, GeneratedValue, Part,
617               /*IsScalar*/ GeneratesPerFirstLaneOnly);
618   }
619 }
620 
621 bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
622   assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
623   if (Instruction::isBinaryOp(getOpcode()))
624     return vputils::onlyFirstLaneUsed(this);
625 
626   switch (getOpcode()) {
627   default:
628     return false;
629   case Instruction::ICmp:
630   case VPInstruction::PtrAdd:
631     // TODO: Cover additional opcodes.
632     return vputils::onlyFirstLaneUsed(this);
633   case VPInstruction::ActiveLaneMask:
634   case VPInstruction::ExplicitVectorLength:
635   case VPInstruction::CalculateTripCountMinusVF:
636   case VPInstruction::CanonicalIVIncrementForPart:
637   case VPInstruction::BranchOnCount:
638     return true;
639   };
640   llvm_unreachable("switch should return");
641 }
642 
643 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
644 void VPInstruction::dump() const {
645   VPSlotTracker SlotTracker(getParent()->getPlan());
646   print(dbgs(), "", SlotTracker);
647 }
648 
649 void VPInstruction::print(raw_ostream &O, const Twine &Indent,
650                           VPSlotTracker &SlotTracker) const {
651   O << Indent << "EMIT ";
652 
653   if (hasResult()) {
654     printAsOperand(O, SlotTracker);
655     O << " = ";
656   }
657 
658   switch (getOpcode()) {
659   case VPInstruction::Not:
660     O << "not";
661     break;
662   case VPInstruction::SLPLoad:
663     O << "combined load";
664     break;
665   case VPInstruction::SLPStore:
666     O << "combined store";
667     break;
668   case VPInstruction::ActiveLaneMask:
669     O << "active lane mask";
670     break;
671   case VPInstruction::ExplicitVectorLength:
672     O << "EXPLICIT-VECTOR-LENGTH";
673     break;
674   case VPInstruction::FirstOrderRecurrenceSplice:
675     O << "first-order splice";
676     break;
677   case VPInstruction::BranchOnCond:
678     O << "branch-on-cond";
679     break;
680   case VPInstruction::CalculateTripCountMinusVF:
681     O << "TC > VF ? TC - VF : 0";
682     break;
683   case VPInstruction::CanonicalIVIncrementForPart:
684     O << "VF * Part +";
685     break;
686   case VPInstruction::BranchOnCount:
687     O << "branch-on-count";
688     break;
689   case VPInstruction::ComputeReductionResult:
690     O << "compute-reduction-result";
691     break;
692   case VPInstruction::PtrAdd:
693     O << "ptradd";
694     break;
695   default:
696     O << Instruction::getOpcodeName(getOpcode());
697   }
698 
699   printFlags(O);
700   printOperands(O, SlotTracker);
701 
702   if (auto DL = getDebugLoc()) {
703     O << ", !dbg ";
704     DL.print(O);
705   }
706 }
707 #endif
708 
709 void VPWidenCallRecipe::execute(VPTransformState &State) {
710   assert(State.VF.isVector() && "not widening");
711   Function *CalledScalarFn = getCalledScalarFunction();
712   assert(!isDbgInfoIntrinsic(CalledScalarFn->getIntrinsicID()) &&
713          "DbgInfoIntrinsic should have been dropped during VPlan construction");
714   State.setDebugLocFrom(getDebugLoc());
715 
716   bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic;
717   FunctionType *VFTy = nullptr;
718   if (Variant)
719     VFTy = Variant->getFunctionType();
720   for (unsigned Part = 0; Part < State.UF; ++Part) {
721     SmallVector<Type *, 2> TysForDecl;
722     // Add return type if intrinsic is overloaded on it.
723     if (UseIntrinsic &&
724         isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
725       TysForDecl.push_back(VectorType::get(
726           CalledScalarFn->getReturnType()->getScalarType(), State.VF));
727     SmallVector<Value *, 4> Args;
728     for (const auto &I : enumerate(arg_operands())) {
729       // Some intrinsics have a scalar argument - don't replace it with a
730       // vector.
731       Value *Arg;
732       if (UseIntrinsic &&
733           isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
734         Arg = State.get(I.value(), VPIteration(0, 0));
735       // Some vectorized function variants may also take a scalar argument,
736       // e.g. linear parameters for pointers. This needs to be the scalar value
737       // from the start of the respective part when interleaving.
738       else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy())
739         Arg = State.get(I.value(), VPIteration(Part, 0));
740       else
741         Arg = State.get(I.value(), Part);
742       if (UseIntrinsic &&
743           isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
744         TysForDecl.push_back(Arg->getType());
745       Args.push_back(Arg);
746     }
747 
748     Function *VectorF;
749     if (UseIntrinsic) {
750       // Use vector version of the intrinsic.
751       Module *M = State.Builder.GetInsertBlock()->getModule();
752       VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
753       assert(VectorF && "Can't retrieve vector intrinsic.");
754     } else {
755 #ifndef NDEBUG
756       assert(Variant != nullptr && "Can't create vector function.");
757 #endif
758       VectorF = Variant;
759     }
760 
761     auto *CI = cast_or_null<CallInst>(getUnderlyingInstr());
762     SmallVector<OperandBundleDef, 1> OpBundles;
763     if (CI)
764       CI->getOperandBundlesAsDefs(OpBundles);
765 
766     CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
767 
768     if (isa<FPMathOperator>(V))
769       V->copyFastMathFlags(CI);
770 
771     if (!V->getType()->isVoidTy())
772       State.set(this, V, Part);
773     State.addMetadata(V, CI);
774   }
775 }
776 
777 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
778 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
779                               VPSlotTracker &SlotTracker) const {
780   O << Indent << "WIDEN-CALL ";
781 
782   Function *CalledFn = getCalledScalarFunction();
783   if (CalledFn->getReturnType()->isVoidTy())
784     O << "void ";
785   else {
786     printAsOperand(O, SlotTracker);
787     O << " = ";
788   }
789 
790   O << "call @" << CalledFn->getName() << "(";
791   interleaveComma(arg_operands(), O, [&O, &SlotTracker](VPValue *Op) {
792     Op->printAsOperand(O, SlotTracker);
793   });
794   O << ")";
795 
796   if (VectorIntrinsicID)
797     O << " (using vector intrinsic)";
798   else {
799     O << " (using library function";
800     if (Variant->hasName())
801       O << ": " << Variant->getName();
802     O << ")";
803   }
804 }
805 
806 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
807                                 VPSlotTracker &SlotTracker) const {
808   O << Indent << "WIDEN-SELECT ";
809   printAsOperand(O, SlotTracker);
810   O << " = select ";
811   getOperand(0)->printAsOperand(O, SlotTracker);
812   O << ", ";
813   getOperand(1)->printAsOperand(O, SlotTracker);
814   O << ", ";
815   getOperand(2)->printAsOperand(O, SlotTracker);
816   O << (isInvariantCond() ? " (condition is loop invariant)" : "");
817 }
818 #endif
819 
820 void VPWidenSelectRecipe::execute(VPTransformState &State) {
821   State.setDebugLocFrom(getDebugLoc());
822 
823   // The condition can be loop invariant but still defined inside the
824   // loop. This means that we can't just use the original 'cond' value.
825   // We have to take the 'vectorized' value and pick the first lane.
826   // Instcombine will make this a no-op.
827   auto *InvarCond =
828       isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
829 
830   for (unsigned Part = 0; Part < State.UF; ++Part) {
831     Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part);
832     Value *Op0 = State.get(getOperand(1), Part);
833     Value *Op1 = State.get(getOperand(2), Part);
834     Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
835     State.set(this, Sel, Part);
836     State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
837   }
838 }
839 
840 VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
841     const FastMathFlags &FMF) {
842   AllowReassoc = FMF.allowReassoc();
843   NoNaNs = FMF.noNaNs();
844   NoInfs = FMF.noInfs();
845   NoSignedZeros = FMF.noSignedZeros();
846   AllowReciprocal = FMF.allowReciprocal();
847   AllowContract = FMF.allowContract();
848   ApproxFunc = FMF.approxFunc();
849 }
850 
851 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
852 void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
853   switch (OpType) {
854   case OperationType::Cmp:
855     O << " " << CmpInst::getPredicateName(getPredicate());
856     break;
857   case OperationType::DisjointOp:
858     if (DisjointFlags.IsDisjoint)
859       O << " disjoint";
860     break;
861   case OperationType::PossiblyExactOp:
862     if (ExactFlags.IsExact)
863       O << " exact";
864     break;
865   case OperationType::OverflowingBinOp:
866     if (WrapFlags.HasNUW)
867       O << " nuw";
868     if (WrapFlags.HasNSW)
869       O << " nsw";
870     break;
871   case OperationType::FPMathOp:
872     getFastMathFlags().print(O);
873     break;
874   case OperationType::GEPOp:
875     if (GEPFlags.IsInBounds)
876       O << " inbounds";
877     break;
878   case OperationType::NonNegOp:
879     if (NonNegFlags.NonNeg)
880       O << " nneg";
881     break;
882   case OperationType::Other:
883     break;
884   }
885   if (getNumOperands() > 0)
886     O << " ";
887 }
888 #endif
889 
890 void VPWidenRecipe::execute(VPTransformState &State) {
891   State.setDebugLocFrom(getDebugLoc());
892   auto &Builder = State.Builder;
893   switch (Opcode) {
894   case Instruction::Call:
895   case Instruction::Br:
896   case Instruction::PHI:
897   case Instruction::GetElementPtr:
898   case Instruction::Select:
899     llvm_unreachable("This instruction is handled by a different recipe.");
900   case Instruction::UDiv:
901   case Instruction::SDiv:
902   case Instruction::SRem:
903   case Instruction::URem:
904   case Instruction::Add:
905   case Instruction::FAdd:
906   case Instruction::Sub:
907   case Instruction::FSub:
908   case Instruction::FNeg:
909   case Instruction::Mul:
910   case Instruction::FMul:
911   case Instruction::FDiv:
912   case Instruction::FRem:
913   case Instruction::Shl:
914   case Instruction::LShr:
915   case Instruction::AShr:
916   case Instruction::And:
917   case Instruction::Or:
918   case Instruction::Xor: {
919     // Just widen unops and binops.
920     for (unsigned Part = 0; Part < State.UF; ++Part) {
921       SmallVector<Value *, 2> Ops;
922       for (VPValue *VPOp : operands())
923         Ops.push_back(State.get(VPOp, Part));
924 
925       Value *V = Builder.CreateNAryOp(Opcode, Ops);
926 
927       if (auto *VecOp = dyn_cast<Instruction>(V))
928         setFlags(VecOp);
929 
930       // Use this vector value for all users of the original instruction.
931       State.set(this, V, Part);
932       State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
933     }
934 
935     break;
936   }
937   case Instruction::Freeze: {
938     for (unsigned Part = 0; Part < State.UF; ++Part) {
939       Value *Op = State.get(getOperand(0), Part);
940 
941       Value *Freeze = Builder.CreateFreeze(Op);
942       State.set(this, Freeze, Part);
943     }
944     break;
945   }
946   case Instruction::ICmp:
947   case Instruction::FCmp: {
948     // Widen compares. Generate vector compares.
949     bool FCmp = Opcode == Instruction::FCmp;
950     for (unsigned Part = 0; Part < State.UF; ++Part) {
951       Value *A = State.get(getOperand(0), Part);
952       Value *B = State.get(getOperand(1), Part);
953       Value *C = nullptr;
954       if (FCmp) {
955         // Propagate fast math flags.
956         IRBuilder<>::FastMathFlagGuard FMFG(Builder);
957         if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
958           Builder.setFastMathFlags(I->getFastMathFlags());
959         C = Builder.CreateFCmp(getPredicate(), A, B);
960       } else {
961         C = Builder.CreateICmp(getPredicate(), A, B);
962       }
963       State.set(this, C, Part);
964       State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
965     }
966 
967     break;
968   }
969   default:
970     // This instruction is not vectorized by simple widening.
971     LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
972                       << Instruction::getOpcodeName(Opcode));
973     llvm_unreachable("Unhandled instruction!");
974   } // end of switch.
975 
976 #if !defined(NDEBUG)
977   // Verify that VPlan type inference results agree with the type of the
978   // generated values.
979   for (unsigned Part = 0; Part < State.UF; ++Part) {
980     assert(VectorType::get(State.TypeAnalysis.inferScalarType(this),
981                            State.VF) == State.get(this, Part)->getType() &&
982            "inferred type and type from generated instructions do not match");
983   }
984 #endif
985 }
986 
987 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
988 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
989                           VPSlotTracker &SlotTracker) const {
990   O << Indent << "WIDEN ";
991   printAsOperand(O, SlotTracker);
992   O << " = " << Instruction::getOpcodeName(Opcode);
993   printFlags(O);
994   printOperands(O, SlotTracker);
995 }
996 #endif
997 
998 void VPWidenCastRecipe::execute(VPTransformState &State) {
999   State.setDebugLocFrom(getDebugLoc());
1000   auto &Builder = State.Builder;
1001   /// Vectorize casts.
1002   assert(State.VF.isVector() && "Not vectorizing?");
1003   Type *DestTy = VectorType::get(getResultType(), State.VF);
1004   VPValue *Op = getOperand(0);
1005   for (unsigned Part = 0; Part < State.UF; ++Part) {
1006     if (Part > 0 && Op->isLiveIn()) {
1007       // FIXME: Remove once explicit unrolling is implemented using VPlan.
1008       State.set(this, State.get(this, 0), Part);
1009       continue;
1010     }
1011     Value *A = State.get(Op, Part);
1012     Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
1013     State.set(this, Cast, Part);
1014     State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
1015   }
1016 }
1017 
1018 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1019 void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
1020                               VPSlotTracker &SlotTracker) const {
1021   O << Indent << "WIDEN-CAST ";
1022   printAsOperand(O, SlotTracker);
1023   O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1024   printFlags(O);
1025   printOperands(O, SlotTracker);
1026   O << " to " << *getResultType();
1027 }
1028 #endif
1029 
1030 /// This function adds
1031 /// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
1032 /// to each vector element of Val. The sequence starts at StartIndex.
1033 /// \p Opcode is relevant for FP induction variable.
1034 static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
1035                             Instruction::BinaryOps BinOp, ElementCount VF,
1036                             IRBuilderBase &Builder) {
1037   assert(VF.isVector() && "only vector VFs are supported");
1038 
1039   // Create and check the types.
1040   auto *ValVTy = cast<VectorType>(Val->getType());
1041   ElementCount VLen = ValVTy->getElementCount();
1042 
1043   Type *STy = Val->getType()->getScalarType();
1044   assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
1045          "Induction Step must be an integer or FP");
1046   assert(Step->getType() == STy && "Step has wrong type");
1047 
1048   SmallVector<Constant *, 8> Indices;
1049 
1050   // Create a vector of consecutive numbers from zero to VF.
1051   VectorType *InitVecValVTy = ValVTy;
1052   if (STy->isFloatingPointTy()) {
1053     Type *InitVecValSTy =
1054         IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());
1055     InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1056   }
1057   Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1058 
1059   // Splat the StartIdx
1060   Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
1061 
1062   if (STy->isIntegerTy()) {
1063     InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
1064     Step = Builder.CreateVectorSplat(VLen, Step);
1065     assert(Step->getType() == Val->getType() && "Invalid step vec");
1066     // FIXME: The newly created binary instructions should contain nsw/nuw
1067     // flags, which can be found from the original scalar operations.
1068     Step = Builder.CreateMul(InitVec, Step);
1069     return Builder.CreateAdd(Val, Step, "induction");
1070   }
1071 
1072   // Floating point induction.
1073   assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
1074          "Binary Opcode should be specified for FP induction");
1075   InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
1076   InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
1077 
1078   Step = Builder.CreateVectorSplat(VLen, Step);
1079   Value *MulOp = Builder.CreateFMul(InitVec, Step);
1080   return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
1081 }
1082 
1083 /// A helper function that returns an integer or floating-point constant with
1084 /// value C.
1085 static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
1086   return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
1087                            : ConstantFP::get(Ty, C);
1088 }
1089 
1090 static Value *getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy,
1091                                   ElementCount VF) {
1092   assert(FTy->isFloatingPointTy() && "Expected floating point type!");
1093   Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
1094   Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
1095   return B.CreateUIToFP(RuntimeVF, FTy);
1096 }
1097 
1098 void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
1099   assert(!State.Instance && "Int or FP induction being replicated.");
1100 
1101   Value *Start = getStartValue()->getLiveInIRValue();
1102   const InductionDescriptor &ID = getInductionDescriptor();
1103   TruncInst *Trunc = getTruncInst();
1104   IRBuilderBase &Builder = State.Builder;
1105   assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
1106   assert(State.VF.isVector() && "must have vector VF");
1107 
1108   // The value from the original loop to which we are mapping the new induction
1109   // variable.
1110   Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
1111 
1112   // Fast-math-flags propagate from the original induction instruction.
1113   IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1114   if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
1115     Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1116 
1117   // Now do the actual transformations, and start with fetching the step value.
1118   Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1119 
1120   assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
1121          "Expected either an induction phi-node or a truncate of it!");
1122 
1123   // Construct the initial value of the vector IV in the vector loop preheader
1124   auto CurrIP = Builder.saveIP();
1125   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1126   Builder.SetInsertPoint(VectorPH->getTerminator());
1127   if (isa<TruncInst>(EntryVal)) {
1128     assert(Start->getType()->isIntegerTy() &&
1129            "Truncation requires an integer type");
1130     auto *TruncType = cast<IntegerType>(EntryVal->getType());
1131     Step = Builder.CreateTrunc(Step, TruncType);
1132     Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
1133   }
1134 
1135   Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
1136   Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1137   Value *SteppedStart = getStepVector(
1138       SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
1139 
1140   // We create vector phi nodes for both integer and floating-point induction
1141   // variables. Here, we determine the kind of arithmetic we will perform.
1142   Instruction::BinaryOps AddOp;
1143   Instruction::BinaryOps MulOp;
1144   if (Step->getType()->isIntegerTy()) {
1145     AddOp = Instruction::Add;
1146     MulOp = Instruction::Mul;
1147   } else {
1148     AddOp = ID.getInductionOpcode();
1149     MulOp = Instruction::FMul;
1150   }
1151 
1152   // Multiply the vectorization factor by the step using integer or
1153   // floating-point arithmetic as appropriate.
1154   Type *StepType = Step->getType();
1155   Value *RuntimeVF;
1156   if (Step->getType()->isFloatingPointTy())
1157     RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
1158   else
1159     RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
1160   Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
1161 
1162   // Create a vector splat to use in the induction update.
1163   //
1164   // FIXME: If the step is non-constant, we create the vector splat with
1165   //        IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
1166   //        handle a constant vector splat.
1167   Value *SplatVF = isa<Constant>(Mul)
1168                        ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
1169                        : Builder.CreateVectorSplat(State.VF, Mul);
1170   Builder.restoreIP(CurrIP);
1171 
1172   // We may need to add the step a number of times, depending on the unroll
1173   // factor. The last of those goes into the PHI.
1174   PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
1175   VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1176   VecInd->setDebugLoc(EntryVal->getDebugLoc());
1177   Instruction *LastInduction = VecInd;
1178   for (unsigned Part = 0; Part < State.UF; ++Part) {
1179     State.set(this, LastInduction, Part);
1180 
1181     if (isa<TruncInst>(EntryVal))
1182       State.addMetadata(LastInduction, EntryVal);
1183 
1184     LastInduction = cast<Instruction>(
1185         Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
1186     LastInduction->setDebugLoc(EntryVal->getDebugLoc());
1187   }
1188 
1189   LastInduction->setName("vec.ind.next");
1190   VecInd->addIncoming(SteppedStart, VectorPH);
1191   // Add induction update using an incorrect block temporarily. The phi node
1192   // will be fixed after VPlan execution. Note that at this point the latch
1193   // block cannot be used, as it does not exist yet.
1194   // TODO: Model increment value in VPlan, by turning the recipe into a
1195   // multi-def and a subclass of VPHeaderPHIRecipe.
1196   VecInd->addIncoming(LastInduction, VectorPH);
1197 }
1198 
1199 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1200 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
1201                                           VPSlotTracker &SlotTracker) const {
1202   O << Indent << "WIDEN-INDUCTION";
1203   if (getTruncInst()) {
1204     O << "\\l\"";
1205     O << " +\n" << Indent << "\"  " << VPlanIngredient(IV) << "\\l\"";
1206     O << " +\n" << Indent << "\"  ";
1207     getVPValue(0)->printAsOperand(O, SlotTracker);
1208   } else
1209     O << " " << VPlanIngredient(IV);
1210 
1211   O << ", ";
1212   getStepValue()->printAsOperand(O, SlotTracker);
1213 }
1214 #endif
1215 
1216 bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
1217   // The step may be defined by a recipe in the preheader (e.g. if it requires
1218   // SCEV expansion), but for the canonical induction the step is required to be
1219   // 1, which is represented as live-in.
1220   if (getStepValue()->getDefiningRecipe())
1221     return false;
1222   auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
1223   auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1224   auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
1225   return StartC && StartC->isZero() && StepC && StepC->isOne() &&
1226          getScalarType() == CanIV->getScalarType();
1227 }
1228 
1229 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1230 void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent,
1231                               VPSlotTracker &SlotTracker) const {
1232   O << Indent;
1233   printAsOperand(O, SlotTracker);
1234   O << Indent << "= DERIVED-IV ";
1235   getStartValue()->printAsOperand(O, SlotTracker);
1236   O << " + ";
1237   getOperand(1)->printAsOperand(O, SlotTracker);
1238   O << " * ";
1239   getStepValue()->printAsOperand(O, SlotTracker);
1240 }
1241 #endif
1242 
1243 void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
1244   // Fast-math-flags propagate from the original induction instruction.
1245   IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
1246   if (hasFastMathFlags())
1247     State.Builder.setFastMathFlags(getFastMathFlags());
1248 
1249   /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1250   /// variable on which to base the steps, \p Step is the size of the step.
1251 
1252   Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0));
1253   Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1254   IRBuilderBase &Builder = State.Builder;
1255 
1256   // Ensure step has the same type as that of scalar IV.
1257   Type *BaseIVTy = BaseIV->getType()->getScalarType();
1258   assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1259 
1260   // We build scalar steps for both integer and floating-point induction
1261   // variables. Here, we determine the kind of arithmetic we will perform.
1262   Instruction::BinaryOps AddOp;
1263   Instruction::BinaryOps MulOp;
1264   if (BaseIVTy->isIntegerTy()) {
1265     AddOp = Instruction::Add;
1266     MulOp = Instruction::Mul;
1267   } else {
1268     AddOp = InductionOpcode;
1269     MulOp = Instruction::FMul;
1270   }
1271 
1272   // Determine the number of scalars we need to generate for each unroll
1273   // iteration.
1274   bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1275   // Compute the scalar steps and save the results in State.
1276   Type *IntStepTy =
1277       IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1278   Type *VecIVTy = nullptr;
1279   Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1280   if (!FirstLaneOnly && State.VF.isScalable()) {
1281     VecIVTy = VectorType::get(BaseIVTy, State.VF);
1282     UnitStepVec =
1283         Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1284     SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1285     SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1286   }
1287 
1288   unsigned StartPart = 0;
1289   unsigned EndPart = State.UF;
1290   unsigned StartLane = 0;
1291   unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1292   if (State.Instance) {
1293     StartPart = State.Instance->Part;
1294     EndPart = StartPart + 1;
1295     StartLane = State.Instance->Lane.getKnownLane();
1296     EndLane = StartLane + 1;
1297   }
1298   for (unsigned Part = StartPart; Part < EndPart; ++Part) {
1299     Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
1300 
1301     if (!FirstLaneOnly && State.VF.isScalable()) {
1302       auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1303       auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1304       if (BaseIVTy->isFloatingPointTy())
1305         InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1306       auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1307       auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1308       State.set(this, Add, Part);
1309       // It's useful to record the lane values too for the known minimum number
1310       // of elements so we do those below. This improves the code quality when
1311       // trying to extract the first element, for example.
1312     }
1313 
1314     if (BaseIVTy->isFloatingPointTy())
1315       StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1316 
1317     for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1318       Value *StartIdx = Builder.CreateBinOp(
1319           AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1320       // The step returned by `createStepForVF` is a runtime-evaluated value
1321       // when VF is scalable. Otherwise, it should be folded into a Constant.
1322       assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1323              "Expected StartIdx to be folded to a constant when VF is not "
1324              "scalable");
1325       auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1326       auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1327       State.set(this, Add, VPIteration(Part, Lane));
1328     }
1329   }
1330 }
1331 
1332 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1333 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
1334                                   VPSlotTracker &SlotTracker) const {
1335   O << Indent;
1336   printAsOperand(O, SlotTracker);
1337   O << " = SCALAR-STEPS ";
1338   printOperands(O, SlotTracker);
1339 }
1340 #endif
1341 
1342 void VPWidenGEPRecipe::execute(VPTransformState &State) {
1343   assert(State.VF.isVector() && "not widening");
1344   auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1345   // Construct a vector GEP by widening the operands of the scalar GEP as
1346   // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1347   // results in a vector of pointers when at least one operand of the GEP
1348   // is vector-typed. Thus, to keep the representation compact, we only use
1349   // vector-typed operands for loop-varying values.
1350 
1351   if (areAllOperandsInvariant()) {
1352     // If we are vectorizing, but the GEP has only loop-invariant operands,
1353     // the GEP we build (by only using vector-typed operands for
1354     // loop-varying values) would be a scalar pointer. Thus, to ensure we
1355     // produce a vector of pointers, we need to either arbitrarily pick an
1356     // operand to broadcast, or broadcast a clone of the original GEP.
1357     // Here, we broadcast a clone of the original.
1358     //
1359     // TODO: If at some point we decide to scalarize instructions having
1360     //       loop-invariant operands, this special case will no longer be
1361     //       required. We would add the scalarization decision to
1362     //       collectLoopScalars() and teach getVectorValue() to broadcast
1363     //       the lane-zero scalar value.
1364     SmallVector<Value *> Ops;
1365     for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1366       Ops.push_back(State.get(getOperand(I), VPIteration(0, 0)));
1367 
1368     auto *NewGEP =
1369         State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1370                                 ArrayRef(Ops).drop_front(), "", isInBounds());
1371     for (unsigned Part = 0; Part < State.UF; ++Part) {
1372       Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1373       State.set(this, EntryPart, Part);
1374       State.addMetadata(EntryPart, GEP);
1375     }
1376   } else {
1377     // If the GEP has at least one loop-varying operand, we are sure to
1378     // produce a vector of pointers. But if we are only unrolling, we want
1379     // to produce a scalar GEP for each unroll part. Thus, the GEP we
1380     // produce with the code below will be scalar (if VF == 1) or vector
1381     // (otherwise). Note that for the unroll-only case, we still maintain
1382     // values in the vector mapping with initVector, as we do for other
1383     // instructions.
1384     for (unsigned Part = 0; Part < State.UF; ++Part) {
1385       // The pointer operand of the new GEP. If it's loop-invariant, we
1386       // won't broadcast it.
1387       auto *Ptr = isPointerLoopInvariant()
1388                       ? State.get(getOperand(0), VPIteration(0, 0))
1389                       : State.get(getOperand(0), Part);
1390 
1391       // Collect all the indices for the new GEP. If any index is
1392       // loop-invariant, we won't broadcast it.
1393       SmallVector<Value *, 4> Indices;
1394       for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1395         VPValue *Operand = getOperand(I);
1396         if (isIndexLoopInvariant(I - 1))
1397           Indices.push_back(State.get(Operand, VPIteration(0, 0)));
1398         else
1399           Indices.push_back(State.get(Operand, Part));
1400       }
1401 
1402       // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1403       // but it should be a vector, otherwise.
1404       auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
1405                                              Indices, "", isInBounds());
1406       assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
1407              "NewGEP is not a pointer vector");
1408       State.set(this, NewGEP, Part);
1409       State.addMetadata(NewGEP, GEP);
1410     }
1411   }
1412 }
1413 
1414 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1415 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
1416                              VPSlotTracker &SlotTracker) const {
1417   O << Indent << "WIDEN-GEP ";
1418   O << (isPointerLoopInvariant() ? "Inv" : "Var");
1419   for (size_t I = 0; I < getNumOperands() - 1; ++I)
1420     O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
1421 
1422   O << " ";
1423   printAsOperand(O, SlotTracker);
1424   O << " = getelementptr";
1425   printFlags(O);
1426   printOperands(O, SlotTracker);
1427 }
1428 #endif
1429 
1430 void VPVectorPointerRecipe ::execute(VPTransformState &State) {
1431   auto &Builder = State.Builder;
1432   State.setDebugLocFrom(getDebugLoc());
1433   for (unsigned Part = 0; Part < State.UF; ++Part) {
1434     // Calculate the pointer for the specific unroll-part.
1435     Value *PartPtr = nullptr;
1436     // Use i32 for the gep index type when the value is constant,
1437     // or query DataLayout for a more suitable index type otherwise.
1438     const DataLayout &DL =
1439         Builder.GetInsertBlock()->getModule()->getDataLayout();
1440     Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0)
1441                         ? DL.getIndexType(IndexedTy->getPointerTo())
1442                         : Builder.getInt32Ty();
1443     Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
1444     bool InBounds = isInBounds();
1445     if (IsReverse) {
1446       // If the address is consecutive but reversed, then the
1447       // wide store needs to start at the last vector element.
1448       // RunTimeVF =  VScale * VF.getKnownMinValue()
1449       // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
1450       Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
1451       // NumElt = -Part * RunTimeVF
1452       Value *NumElt = Builder.CreateMul(
1453           ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);
1454       // LastLane = 1 - RunTimeVF
1455       Value *LastLane =
1456           Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
1457       PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
1458       PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds);
1459     } else {
1460       Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
1461       PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
1462     }
1463 
1464     State.set(this, PartPtr, Part, /*IsScalar*/ true);
1465   }
1466 }
1467 
1468 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1469 void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
1470                                   VPSlotTracker &SlotTracker) const {
1471   O << Indent;
1472   printAsOperand(O, SlotTracker);
1473   O << " = vector-pointer ";
1474   if (IsReverse)
1475     O << "(reverse) ";
1476 
1477   printOperands(O, SlotTracker);
1478 }
1479 #endif
1480 
1481 void VPBlendRecipe::execute(VPTransformState &State) {
1482   State.setDebugLocFrom(getDebugLoc());
1483   // We know that all PHIs in non-header blocks are converted into
1484   // selects, so we don't have to worry about the insertion order and we
1485   // can just use the builder.
1486   // At this point we generate the predication tree. There may be
1487   // duplications since this is a simple recursive scan, but future
1488   // optimizations will clean it up.
1489 
1490   unsigned NumIncoming = getNumIncomingValues();
1491 
1492   // Generate a sequence of selects of the form:
1493   // SELECT(Mask3, In3,
1494   //        SELECT(Mask2, In2,
1495   //               SELECT(Mask1, In1,
1496   //                      In0)))
1497   // Note that Mask0 is never used: lanes for which no path reaches this phi and
1498   // are essentially undef are taken from In0.
1499  VectorParts Entry(State.UF);
1500   for (unsigned In = 0; In < NumIncoming; ++In) {
1501     for (unsigned Part = 0; Part < State.UF; ++Part) {
1502       // We might have single edge PHIs (blocks) - use an identity
1503       // 'select' for the first PHI operand.
1504       Value *In0 = State.get(getIncomingValue(In), Part);
1505       if (In == 0)
1506         Entry[Part] = In0; // Initialize with the first incoming value.
1507       else {
1508         // Select between the current value and the previous incoming edge
1509         // based on the incoming mask.
1510         Value *Cond = State.get(getMask(In), Part);
1511         Entry[Part] =
1512             State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
1513       }
1514     }
1515   }
1516   for (unsigned Part = 0; Part < State.UF; ++Part)
1517     State.set(this, Entry[Part], Part);
1518 }
1519 
1520 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1521 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
1522                           VPSlotTracker &SlotTracker) const {
1523   O << Indent << "BLEND ";
1524   printAsOperand(O, SlotTracker);
1525   O << " =";
1526   if (getNumIncomingValues() == 1) {
1527     // Not a User of any mask: not really blending, this is a
1528     // single-predecessor phi.
1529     O << " ";
1530     getIncomingValue(0)->printAsOperand(O, SlotTracker);
1531   } else {
1532     for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
1533       O << " ";
1534       getIncomingValue(I)->printAsOperand(O, SlotTracker);
1535       if (I == 0)
1536         continue;
1537       O << "/";
1538       getMask(I)->printAsOperand(O, SlotTracker);
1539     }
1540   }
1541 }
1542 #endif
1543 
1544 void VPReductionRecipe::execute(VPTransformState &State) {
1545   assert(!State.Instance && "Reduction being replicated.");
1546   Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true);
1547   RecurKind Kind = RdxDesc.getRecurrenceKind();
1548   // Propagate the fast-math flags carried by the underlying instruction.
1549   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
1550   State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
1551   for (unsigned Part = 0; Part < State.UF; ++Part) {
1552     Value *NewVecOp = State.get(getVecOp(), Part);
1553     if (VPValue *Cond = getCondOp()) {
1554       Value *NewCond = State.get(Cond, Part, State.VF.isScalar());
1555       VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
1556       Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
1557       Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy,
1558                                                   RdxDesc.getFastMathFlags());
1559       if (State.VF.isVector()) {
1560         Iden = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
1561       }
1562 
1563       Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden);
1564       NewVecOp = Select;
1565     }
1566     Value *NewRed;
1567     Value *NextInChain;
1568     if (IsOrdered) {
1569       if (State.VF.isVector())
1570         NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp,
1571                                         PrevInChain);
1572       else
1573         NewRed = State.Builder.CreateBinOp(
1574             (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain,
1575             NewVecOp);
1576       PrevInChain = NewRed;
1577     } else {
1578       PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true);
1579       NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp);
1580     }
1581     if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
1582       NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
1583                                    NewRed, PrevInChain);
1584     } else if (IsOrdered)
1585       NextInChain = NewRed;
1586     else
1587       NextInChain = State.Builder.CreateBinOp(
1588           (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain);
1589     State.set(this, NextInChain, Part, /*IsScalar*/ true);
1590   }
1591 }
1592 
1593 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1594 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
1595                               VPSlotTracker &SlotTracker) const {
1596   O << Indent << "REDUCE ";
1597   printAsOperand(O, SlotTracker);
1598   O << " = ";
1599   getChainOp()->printAsOperand(O, SlotTracker);
1600   O << " +";
1601   if (isa<FPMathOperator>(getUnderlyingInstr()))
1602     O << getUnderlyingInstr()->getFastMathFlags();
1603   O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
1604   getVecOp()->printAsOperand(O, SlotTracker);
1605   if (getCondOp()) {
1606     O << ", ";
1607     getCondOp()->printAsOperand(O, SlotTracker);
1608   }
1609   O << ")";
1610   if (RdxDesc.IntermediateStore)
1611     O << " (with final reduction value stored in invariant address sank "
1612          "outside of loop)";
1613 }
1614 #endif
1615 
1616 bool VPReplicateRecipe::shouldPack() const {
1617   // Find if the recipe is used by a widened recipe via an intervening
1618   // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
1619   return any_of(users(), [](const VPUser *U) {
1620     if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
1621       return any_of(PredR->users(), [PredR](const VPUser *U) {
1622         return !U->usesScalars(PredR);
1623       });
1624     return false;
1625   });
1626 }
1627 
1628 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1629 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
1630                               VPSlotTracker &SlotTracker) const {
1631   O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
1632 
1633   if (!getUnderlyingInstr()->getType()->isVoidTy()) {
1634     printAsOperand(O, SlotTracker);
1635     O << " = ";
1636   }
1637   if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
1638     O << "call";
1639     printFlags(O);
1640     O << "@" << CB->getCalledFunction()->getName() << "(";
1641     interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
1642                     O, [&O, &SlotTracker](VPValue *Op) {
1643                       Op->printAsOperand(O, SlotTracker);
1644                     });
1645     O << ")";
1646   } else {
1647     O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode());
1648     printFlags(O);
1649     printOperands(O, SlotTracker);
1650   }
1651 
1652   if (shouldPack())
1653     O << " (S->V)";
1654 }
1655 #endif
1656 
1657 /// Checks if \p C is uniform across all VFs and UFs. It is considered as such
1658 /// if it is either defined outside the vector region or its operand is known to
1659 /// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).
1660 /// TODO: Uniformity should be associated with a VPValue and there should be a
1661 /// generic way to check.
1662 static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C) {
1663   return C->isDefinedOutsideVectorRegions() ||
1664          isa<VPDerivedIVRecipe>(C->getOperand(0)) ||
1665          isa<VPCanonicalIVPHIRecipe>(C->getOperand(0));
1666 }
1667 
1668 Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
1669   assert(vputils::onlyFirstLaneUsed(this) &&
1670          "Codegen only implemented for first lane.");
1671   switch (Opcode) {
1672   case Instruction::SExt:
1673   case Instruction::ZExt:
1674   case Instruction::Trunc: {
1675     // Note: SExt/ZExt not used yet.
1676     Value *Op = State.get(getOperand(0), VPIteration(Part, 0));
1677     return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
1678   }
1679   default:
1680     llvm_unreachable("opcode not implemented yet");
1681   }
1682 }
1683 
1684 void VPScalarCastRecipe ::execute(VPTransformState &State) {
1685   bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this);
1686   for (unsigned Part = 0; Part != State.UF; ++Part) {
1687     Value *Res;
1688     // Only generate a single instance, if the recipe is uniform across UFs and
1689     // VFs.
1690     if (Part > 0 && IsUniformAcrossVFsAndUFs)
1691       Res = State.get(this, VPIteration(0, 0));
1692     else
1693       Res = generate(State, Part);
1694     State.set(this, Res, VPIteration(Part, 0));
1695   }
1696 }
1697 
1698 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1699 void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
1700                                 VPSlotTracker &SlotTracker) const {
1701   O << Indent << "SCALAR-CAST ";
1702   printAsOperand(O, SlotTracker);
1703   O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1704   printOperands(O, SlotTracker);
1705   O << " to " << *ResultTy;
1706 }
1707 #endif
1708 
1709 void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
1710   assert(State.Instance && "Branch on Mask works only on single instance.");
1711 
1712   unsigned Part = State.Instance->Part;
1713   unsigned Lane = State.Instance->Lane.getKnownLane();
1714 
1715   Value *ConditionBit = nullptr;
1716   VPValue *BlockInMask = getMask();
1717   if (BlockInMask) {
1718     ConditionBit = State.get(BlockInMask, Part);
1719     if (ConditionBit->getType()->isVectorTy())
1720       ConditionBit = State.Builder.CreateExtractElement(
1721           ConditionBit, State.Builder.getInt32(Lane));
1722   } else // Block in mask is all-one.
1723     ConditionBit = State.Builder.getTrue();
1724 
1725   // Replace the temporary unreachable terminator with a new conditional branch,
1726   // whose two destinations will be set later when they are created.
1727   auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
1728   assert(isa<UnreachableInst>(CurrentTerminator) &&
1729          "Expected to replace unreachable terminator with conditional branch.");
1730   auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
1731   CondBr->setSuccessor(0, nullptr);
1732   ReplaceInstWithInst(CurrentTerminator, CondBr);
1733 }
1734 
1735 void VPPredInstPHIRecipe::execute(VPTransformState &State) {
1736   assert(State.Instance && "Predicated instruction PHI works per instance.");
1737   Instruction *ScalarPredInst =
1738       cast<Instruction>(State.get(getOperand(0), *State.Instance));
1739   BasicBlock *PredicatedBB = ScalarPredInst->getParent();
1740   BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
1741   assert(PredicatingBB && "Predicated block has no single predecessor.");
1742   assert(isa<VPReplicateRecipe>(getOperand(0)) &&
1743          "operand must be VPReplicateRecipe");
1744 
1745   // By current pack/unpack logic we need to generate only a single phi node: if
1746   // a vector value for the predicated instruction exists at this point it means
1747   // the instruction has vector users only, and a phi for the vector value is
1748   // needed. In this case the recipe of the predicated instruction is marked to
1749   // also do that packing, thereby "hoisting" the insert-element sequence.
1750   // Otherwise, a phi node for the scalar value is needed.
1751   unsigned Part = State.Instance->Part;
1752   if (State.hasVectorValue(getOperand(0), Part)) {
1753     Value *VectorValue = State.get(getOperand(0), Part);
1754     InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
1755     PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
1756     VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
1757     VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
1758     if (State.hasVectorValue(this, Part))
1759       State.reset(this, VPhi, Part);
1760     else
1761       State.set(this, VPhi, Part);
1762     // NOTE: Currently we need to update the value of the operand, so the next
1763     // predicated iteration inserts its generated value in the correct vector.
1764     State.reset(getOperand(0), VPhi, Part);
1765   } else {
1766     Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
1767     PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
1768     Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
1769                      PredicatingBB);
1770     Phi->addIncoming(ScalarPredInst, PredicatedBB);
1771     if (State.hasScalarValue(this, *State.Instance))
1772       State.reset(this, Phi, *State.Instance);
1773     else
1774       State.set(this, Phi, *State.Instance);
1775     // NOTE: Currently we need to update the value of the operand, so the next
1776     // predicated iteration inserts its generated value in the correct vector.
1777     State.reset(getOperand(0), Phi, *State.Instance);
1778   }
1779 }
1780 
1781 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1782 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1783                                 VPSlotTracker &SlotTracker) const {
1784   O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1785   printAsOperand(O, SlotTracker);
1786   O << " = ";
1787   printOperands(O, SlotTracker);
1788 }
1789 
1790 void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,
1791                               VPSlotTracker &SlotTracker) const {
1792   O << Indent << "WIDEN ";
1793   printAsOperand(O, SlotTracker);
1794   O << " = load ";
1795   printOperands(O, SlotTracker);
1796 }
1797 
1798 void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent,
1799                                  VPSlotTracker &SlotTracker) const {
1800   O << Indent << "WIDEN ";
1801   printAsOperand(O, SlotTracker);
1802   O << " = vp.load ";
1803   printOperands(O, SlotTracker);
1804 }
1805 
1806 void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
1807                                VPSlotTracker &SlotTracker) const {
1808   O << Indent << "WIDEN store ";
1809   printOperands(O, SlotTracker);
1810 }
1811 
1812 void VPWidenStoreEVLRecipe::print(raw_ostream &O, const Twine &Indent,
1813                                   VPSlotTracker &SlotTracker) const {
1814   O << Indent << "WIDEN vp.store ";
1815   printOperands(O, SlotTracker);
1816 }
1817 #endif
1818 
1819 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
1820   Value *Start = getStartValue()->getLiveInIRValue();
1821   PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index");
1822   EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1823 
1824   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1825   EntryPart->addIncoming(Start, VectorPH);
1826   EntryPart->setDebugLoc(getDebugLoc());
1827   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1828     State.set(this, EntryPart, Part, /*IsScalar*/ true);
1829 }
1830 
1831 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1832 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1833                                    VPSlotTracker &SlotTracker) const {
1834   O << Indent << "EMIT ";
1835   printAsOperand(O, SlotTracker);
1836   O << " = CANONICAL-INDUCTION ";
1837   printOperands(O, SlotTracker);
1838 }
1839 #endif
1840 
1841 bool VPCanonicalIVPHIRecipe::isCanonical(
1842     InductionDescriptor::InductionKind Kind, VPValue *Start,
1843     VPValue *Step) const {
1844   // Must be an integer induction.
1845   if (Kind != InductionDescriptor::IK_IntInduction)
1846     return false;
1847   // Start must match the start value of this canonical induction.
1848   if (Start != getStartValue())
1849     return false;
1850 
1851   // If the step is defined by a recipe, it is not a ConstantInt.
1852   if (Step->getDefiningRecipe())
1853     return false;
1854 
1855   ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
1856   return StepC && StepC->isOne();
1857 }
1858 
1859 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) {
1860   return IsScalarAfterVectorization &&
1861          (!IsScalable || vputils::onlyFirstLaneUsed(this));
1862 }
1863 
1864 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1865 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
1866                                           VPSlotTracker &SlotTracker) const {
1867   O << Indent << "EMIT ";
1868   printAsOperand(O, SlotTracker);
1869   O << " = WIDEN-POINTER-INDUCTION ";
1870   getStartValue()->printAsOperand(O, SlotTracker);
1871   O << ", " << *IndDesc.getStep();
1872 }
1873 #endif
1874 
1875 void VPExpandSCEVRecipe::execute(VPTransformState &State) {
1876   assert(!State.Instance && "cannot be used in per-lane");
1877   const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
1878   SCEVExpander Exp(SE, DL, "induction");
1879 
1880   Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
1881                                  &*State.Builder.GetInsertPoint());
1882   assert(!State.ExpandedSCEVs.contains(Expr) &&
1883          "Same SCEV expanded multiple times");
1884   State.ExpandedSCEVs[Expr] = Res;
1885   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1886     State.set(this, Res, {Part, 0});
1887 }
1888 
1889 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1890 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
1891                                VPSlotTracker &SlotTracker) const {
1892   O << Indent << "EMIT ";
1893   getVPSingleValue()->printAsOperand(O, SlotTracker);
1894   O << " = EXPAND SCEV " << *Expr;
1895 }
1896 #endif
1897 
1898 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
1899   Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true);
1900   Type *STy = CanonicalIV->getType();
1901   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
1902   ElementCount VF = State.VF;
1903   Value *VStart = VF.isScalar()
1904                       ? CanonicalIV
1905                       : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
1906   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1907     Value *VStep = createStepForVF(Builder, STy, VF, Part);
1908     if (VF.isVector()) {
1909       VStep = Builder.CreateVectorSplat(VF, VStep);
1910       VStep =
1911           Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
1912     }
1913     Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1914     State.set(this, CanonicalVectorIV, Part);
1915   }
1916 }
1917 
1918 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1919 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
1920                                      VPSlotTracker &SlotTracker) const {
1921   O << Indent << "EMIT ";
1922   printAsOperand(O, SlotTracker);
1923   O << " = WIDEN-CANONICAL-INDUCTION ";
1924   printOperands(O, SlotTracker);
1925 }
1926 #endif
1927 
1928 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
1929   auto &Builder = State.Builder;
1930   // Create a vector from the initial value.
1931   auto *VectorInit = getStartValue()->getLiveInIRValue();
1932 
1933   Type *VecTy = State.VF.isScalar()
1934                     ? VectorInit->getType()
1935                     : VectorType::get(VectorInit->getType(), State.VF);
1936 
1937   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1938   if (State.VF.isVector()) {
1939     auto *IdxTy = Builder.getInt32Ty();
1940     auto *One = ConstantInt::get(IdxTy, 1);
1941     IRBuilder<>::InsertPointGuard Guard(Builder);
1942     Builder.SetInsertPoint(VectorPH->getTerminator());
1943     auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
1944     auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
1945     VectorInit = Builder.CreateInsertElement(
1946         PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
1947   }
1948 
1949   // Create a phi node for the new recurrence.
1950   PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur");
1951   EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1952   EntryPart->addIncoming(VectorInit, VectorPH);
1953   State.set(this, EntryPart, 0);
1954 }
1955 
1956 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1957 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
1958                                             VPSlotTracker &SlotTracker) const {
1959   O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
1960   printAsOperand(O, SlotTracker);
1961   O << " = phi ";
1962   printOperands(O, SlotTracker);
1963 }
1964 #endif
1965 
1966 void VPReductionPHIRecipe::execute(VPTransformState &State) {
1967   auto &Builder = State.Builder;
1968 
1969   // Reductions do not have to start at zero. They can start with
1970   // any loop invariant values.
1971   VPValue *StartVPV = getStartValue();
1972   Value *StartV = StartVPV->getLiveInIRValue();
1973 
1974   // In order to support recurrences we need to be able to vectorize Phi nodes.
1975   // Phi nodes have cycles, so we need to vectorize them in two stages. This is
1976   // stage #1: We create a new vector PHI node with no incoming edges. We'll use
1977   // this value when we vectorize all of the instructions that use the PHI.
1978   bool ScalarPHI = State.VF.isScalar() || IsInLoop;
1979   Type *VecTy = ScalarPHI ? StartV->getType()
1980                           : VectorType::get(StartV->getType(), State.VF);
1981 
1982   BasicBlock *HeaderBB = State.CFG.PrevBB;
1983   assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
1984          "recipe must be in the vector loop header");
1985   unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
1986   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1987     Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");
1988     EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());
1989     State.set(this, EntryPart, Part, IsInLoop);
1990   }
1991 
1992   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1993 
1994   Value *Iden = nullptr;
1995   RecurKind RK = RdxDesc.getRecurrenceKind();
1996   if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
1997       RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
1998     // MinMax and AnyOf reductions have the start value as their identity.
1999     if (ScalarPHI) {
2000       Iden = StartV;
2001     } else {
2002       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
2003       Builder.SetInsertPoint(VectorPH->getTerminator());
2004       StartV = Iden =
2005           Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
2006     }
2007   } else {
2008     Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
2009                                          RdxDesc.getFastMathFlags());
2010 
2011     if (!ScalarPHI) {
2012       Iden = Builder.CreateVectorSplat(State.VF, Iden);
2013       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
2014       Builder.SetInsertPoint(VectorPH->getTerminator());
2015       Constant *Zero = Builder.getInt32(0);
2016       StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
2017     }
2018   }
2019 
2020   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
2021     Value *EntryPart = State.get(this, Part, IsInLoop);
2022     // Make sure to add the reduction start value only to the
2023     // first unroll part.
2024     Value *StartVal = (Part == 0) ? StartV : Iden;
2025     cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
2026   }
2027 }
2028 
2029 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2030 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2031                                  VPSlotTracker &SlotTracker) const {
2032   O << Indent << "WIDEN-REDUCTION-PHI ";
2033 
2034   printAsOperand(O, SlotTracker);
2035   O << " = phi ";
2036   printOperands(O, SlotTracker);
2037 }
2038 #endif
2039 
2040 void VPWidenPHIRecipe::execute(VPTransformState &State) {
2041   assert(EnableVPlanNativePath &&
2042          "Non-native vplans are not expected to have VPWidenPHIRecipes.");
2043 
2044   Value *Op0 = State.get(getOperand(0), 0);
2045   Type *VecTy = Op0->getType();
2046   Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
2047   State.set(this, VecPhi, 0);
2048 }
2049 
2050 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2051 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2052                              VPSlotTracker &SlotTracker) const {
2053   O << Indent << "WIDEN-PHI ";
2054 
2055   auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
2056   // Unless all incoming values are modeled in VPlan  print the original PHI
2057   // directly.
2058   // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
2059   // values as VPValues.
2060   if (getNumOperands() != OriginalPhi->getNumOperands()) {
2061     O << VPlanIngredient(OriginalPhi);
2062     return;
2063   }
2064 
2065   printAsOperand(O, SlotTracker);
2066   O << " = phi ";
2067   printOperands(O, SlotTracker);
2068 }
2069 #endif
2070 
2071 // TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2072 // remove VPActiveLaneMaskPHIRecipe.
2073 void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {
2074   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2075   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
2076     Value *StartMask = State.get(getOperand(0), Part);
2077     PHINode *EntryPart =
2078         State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
2079     EntryPart->addIncoming(StartMask, VectorPH);
2080     EntryPart->setDebugLoc(getDebugLoc());
2081     State.set(this, EntryPart, Part);
2082   }
2083 }
2084 
2085 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2086 void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2087                                       VPSlotTracker &SlotTracker) const {
2088   O << Indent << "ACTIVE-LANE-MASK-PHI ";
2089 
2090   printAsOperand(O, SlotTracker);
2091   O << " = phi ";
2092   printOperands(O, SlotTracker);
2093 }
2094 #endif
2095 
2096 void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) {
2097   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2098   assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization.");
2099   Value *Start = State.get(getOperand(0), VPIteration(0, 0));
2100   PHINode *EntryPart =
2101       State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv");
2102   EntryPart->addIncoming(Start, VectorPH);
2103   EntryPart->setDebugLoc(getDebugLoc());
2104   State.set(this, EntryPart, 0, /*IsScalar=*/true);
2105 }
2106 
2107 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2108 void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2109                                   VPSlotTracker &SlotTracker) const {
2110   O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
2111 
2112   printAsOperand(O, SlotTracker);
2113   O << " = phi ";
2114   printOperands(O, SlotTracker);
2115 }
2116 #endif
2117