xref: /llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (revision e2a72fa583d9ccec7e996e15ea86f0ceddbfe63c)
1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains implementations for different VPlan recipes.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "VPlan.h"
15 #include "VPlanAnalysis.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/Analysis/IVDescriptors.h"
20 #include "llvm/IR/BasicBlock.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/Instruction.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/Type.h"
25 #include "llvm/IR/Value.h"
26 #include "llvm/Support/Casting.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
31 #include "llvm/Transforms/Utils/LoopUtils.h"
32 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
33 #include <cassert>
34 
35 using namespace llvm;
36 
37 using VectorParts = SmallVector<Value *, 2>;
38 
39 namespace llvm {
40 extern cl::opt<bool> EnableVPlanNativePath;
41 }
42 
43 #define LV_NAME "loop-vectorize"
44 #define DEBUG_TYPE LV_NAME
45 
46 bool VPRecipeBase::mayWriteToMemory() const {
47   switch (getVPDefID()) {
48   case VPInterleaveSC:
49     return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
50   case VPWidenStoreEVLSC:
51   case VPWidenStoreSC:
52     return true;
53   case VPReplicateSC:
54   case VPWidenCallSC:
55     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
56         ->mayWriteToMemory();
57   case VPBranchOnMaskSC:
58   case VPScalarIVStepsSC:
59   case VPPredInstPHISC:
60     return false;
61   case VPBlendSC:
62   case VPReductionSC:
63   case VPWidenCanonicalIVSC:
64   case VPWidenCastSC:
65   case VPWidenGEPSC:
66   case VPWidenIntOrFpInductionSC:
67   case VPWidenLoadEVLSC:
68   case VPWidenLoadSC:
69   case VPWidenPHISC:
70   case VPWidenSC:
71   case VPWidenSelectSC: {
72     const Instruction *I =
73         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
74     (void)I;
75     assert((!I || !I->mayWriteToMemory()) &&
76            "underlying instruction may write to memory");
77     return false;
78   }
79   default:
80     return true;
81   }
82 }
83 
84 bool VPRecipeBase::mayReadFromMemory() const {
85   switch (getVPDefID()) {
86   case VPWidenLoadEVLSC:
87   case VPWidenLoadSC:
88     return true;
89   case VPReplicateSC:
90   case VPWidenCallSC:
91     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
92         ->mayReadFromMemory();
93   case VPBranchOnMaskSC:
94   case VPPredInstPHISC:
95   case VPScalarIVStepsSC:
96   case VPWidenStoreEVLSC:
97   case VPWidenStoreSC:
98     return false;
99   case VPBlendSC:
100   case VPReductionSC:
101   case VPWidenCanonicalIVSC:
102   case VPWidenCastSC:
103   case VPWidenGEPSC:
104   case VPWidenIntOrFpInductionSC:
105   case VPWidenPHISC:
106   case VPWidenSC:
107   case VPWidenSelectSC: {
108     const Instruction *I =
109         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
110     (void)I;
111     assert((!I || !I->mayReadFromMemory()) &&
112            "underlying instruction may read from memory");
113     return false;
114   }
115   default:
116     return true;
117   }
118 }
119 
120 bool VPRecipeBase::mayHaveSideEffects() const {
121   switch (getVPDefID()) {
122   case VPDerivedIVSC:
123   case VPPredInstPHISC:
124   case VPScalarCastSC:
125     return false;
126   case VPInstructionSC:
127     switch (cast<VPInstruction>(this)->getOpcode()) {
128     case Instruction::Or:
129     case Instruction::ICmp:
130     case Instruction::Select:
131     case VPInstruction::Not:
132     case VPInstruction::CalculateTripCountMinusVF:
133     case VPInstruction::CanonicalIVIncrementForPart:
134     case VPInstruction::PtrAdd:
135       return false;
136     default:
137       return true;
138     }
139   case VPWidenCallSC:
140     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
141         ->mayHaveSideEffects();
142   case VPBlendSC:
143   case VPReductionSC:
144   case VPScalarIVStepsSC:
145   case VPWidenCanonicalIVSC:
146   case VPWidenCastSC:
147   case VPWidenGEPSC:
148   case VPWidenIntOrFpInductionSC:
149   case VPWidenPHISC:
150   case VPWidenPointerInductionSC:
151   case VPWidenSC:
152   case VPWidenSelectSC: {
153     const Instruction *I =
154         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
155     (void)I;
156     assert((!I || !I->mayHaveSideEffects()) &&
157            "underlying instruction has side-effects");
158     return false;
159   }
160   case VPInterleaveSC:
161     return mayWriteToMemory();
162   case VPWidenLoadEVLSC:
163   case VPWidenLoadSC:
164   case VPWidenStoreEVLSC:
165   case VPWidenStoreSC:
166     assert(
167         cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
168             mayWriteToMemory() &&
169         "mayHaveSideffects result for ingredient differs from this "
170         "implementation");
171     return mayWriteToMemory();
172   case VPReplicateSC: {
173     auto *R = cast<VPReplicateRecipe>(this);
174     return R->getUnderlyingInstr()->mayHaveSideEffects();
175   }
176   default:
177     return true;
178   }
179 }
180 
181 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
182   auto Lane = VPLane::getLastLaneForVF(State.VF);
183   VPValue *ExitValue = getOperand(0);
184   if (vputils::isUniformAfterVectorization(ExitValue))
185     Lane = VPLane::getFirstLane();
186   VPBasicBlock *MiddleVPBB =
187       cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
188   assert(MiddleVPBB->getNumSuccessors() == 0 &&
189          "the middle block must not have any successors");
190   BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB];
191   Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
192                    MiddleBB);
193 }
194 
195 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
196 void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
197   O << "Live-out ";
198   getPhi()->printAsOperand(O);
199   O << " = ";
200   getOperand(0)->printAsOperand(O, SlotTracker);
201   O << "\n";
202 }
203 #endif
204 
205 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
206   assert(!Parent && "Recipe already in some VPBasicBlock");
207   assert(InsertPos->getParent() &&
208          "Insertion position not in any VPBasicBlock");
209   InsertPos->getParent()->insert(this, InsertPos->getIterator());
210 }
211 
212 void VPRecipeBase::insertBefore(VPBasicBlock &BB,
213                                 iplist<VPRecipeBase>::iterator I) {
214   assert(!Parent && "Recipe already in some VPBasicBlock");
215   assert(I == BB.end() || I->getParent() == &BB);
216   BB.insert(this, I);
217 }
218 
219 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
220   assert(!Parent && "Recipe already in some VPBasicBlock");
221   assert(InsertPos->getParent() &&
222          "Insertion position not in any VPBasicBlock");
223   InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
224 }
225 
226 void VPRecipeBase::removeFromParent() {
227   assert(getParent() && "Recipe not in any VPBasicBlock");
228   getParent()->getRecipeList().remove(getIterator());
229   Parent = nullptr;
230 }
231 
232 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
233   assert(getParent() && "Recipe not in any VPBasicBlock");
234   return getParent()->getRecipeList().erase(getIterator());
235 }
236 
237 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
238   removeFromParent();
239   insertAfter(InsertPos);
240 }
241 
242 void VPRecipeBase::moveBefore(VPBasicBlock &BB,
243                               iplist<VPRecipeBase>::iterator I) {
244   removeFromParent();
245   insertBefore(BB, I);
246 }
247 
248 FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const {
249   assert(OpType == OperationType::FPMathOp &&
250          "recipe doesn't have fast math flags");
251   FastMathFlags Res;
252   Res.setAllowReassoc(FMFs.AllowReassoc);
253   Res.setNoNaNs(FMFs.NoNaNs);
254   Res.setNoInfs(FMFs.NoInfs);
255   Res.setNoSignedZeros(FMFs.NoSignedZeros);
256   Res.setAllowReciprocal(FMFs.AllowReciprocal);
257   Res.setAllowContract(FMFs.AllowContract);
258   Res.setApproxFunc(FMFs.ApproxFunc);
259   return Res;
260 }
261 
262 VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred,
263                              VPValue *A, VPValue *B, DebugLoc DL,
264                              const Twine &Name)
265     : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
266                           Pred, DL),
267       Opcode(Opcode), Name(Name.str()) {
268   assert(Opcode == Instruction::ICmp &&
269          "only ICmp predicates supported at the moment");
270 }
271 
272 VPInstruction::VPInstruction(unsigned Opcode,
273                              std::initializer_list<VPValue *> Operands,
274                              FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
275     : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
276       Opcode(Opcode), Name(Name.str()) {
277   // Make sure the VPInstruction is a floating-point operation.
278   assert(isFPMathOp() && "this op can't take fast-math flags");
279 }
280 
281 bool VPInstruction::doesGeneratePerAllLanes() const {
282   return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
283 }
284 
285 bool VPInstruction::canGenerateScalarForFirstLane() const {
286   if (Instruction::isBinaryOp(getOpcode()))
287     return true;
288 
289   switch (Opcode) {
290   case VPInstruction::BranchOnCond:
291   case VPInstruction::BranchOnCount:
292   case VPInstruction::CalculateTripCountMinusVF:
293   case VPInstruction::CanonicalIVIncrementForPart:
294   case VPInstruction::ComputeReductionResult:
295   case VPInstruction::PtrAdd:
296   case VPInstruction::ExplicitVectorLength:
297     return true;
298   default:
299     return false;
300   }
301 }
302 
303 Value *VPInstruction::generatePerLane(VPTransformState &State,
304                                       const VPIteration &Lane) {
305   IRBuilderBase &Builder = State.Builder;
306 
307   assert(getOpcode() == VPInstruction::PtrAdd &&
308          "only PtrAdd opcodes are supported for now");
309   return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
310                               State.get(getOperand(1), Lane), Name);
311 }
312 
313 Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
314   IRBuilderBase &Builder = State.Builder;
315 
316   if (Instruction::isBinaryOp(getOpcode())) {
317     bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
318     if (Part != 0 && vputils::onlyFirstPartUsed(this))
319       return State.get(this, 0, OnlyFirstLaneUsed);
320 
321     Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
322     Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
323     auto *Res =
324         Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
325     if (auto *I = dyn_cast<Instruction>(Res))
326       setFlags(I);
327     return Res;
328   }
329 
330   switch (getOpcode()) {
331   case VPInstruction::Not: {
332     Value *A = State.get(getOperand(0), Part);
333     return Builder.CreateNot(A, Name);
334   }
335   case Instruction::ICmp: {
336     Value *A = State.get(getOperand(0), Part);
337     Value *B = State.get(getOperand(1), Part);
338     return Builder.CreateCmp(getPredicate(), A, B, Name);
339   }
340   case Instruction::Select: {
341     Value *Cond = State.get(getOperand(0), Part);
342     Value *Op1 = State.get(getOperand(1), Part);
343     Value *Op2 = State.get(getOperand(2), Part);
344     return Builder.CreateSelect(Cond, Op1, Op2, Name);
345   }
346   case VPInstruction::ActiveLaneMask: {
347     // Get first lane of vector induction variable.
348     Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
349     // Get the original loop tripcount.
350     Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0));
351 
352     // If this part of the active lane mask is scalar, generate the CMP directly
353     // to avoid unnecessary extracts.
354     if (State.VF.isScalar())
355       return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
356                                Name);
357 
358     auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
359     auto *PredTy = VectorType::get(Int1Ty, State.VF);
360     return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
361                                    {PredTy, ScalarTC->getType()},
362                                    {VIVElem0, ScalarTC}, nullptr, Name);
363   }
364   case VPInstruction::FirstOrderRecurrenceSplice: {
365     // Generate code to combine the previous and current values in vector v3.
366     //
367     //   vector.ph:
368     //     v_init = vector(..., ..., ..., a[-1])
369     //     br vector.body
370     //
371     //   vector.body
372     //     i = phi [0, vector.ph], [i+4, vector.body]
373     //     v1 = phi [v_init, vector.ph], [v2, vector.body]
374     //     v2 = a[i, i+1, i+2, i+3];
375     //     v3 = vector(v1(3), v2(0, 1, 2))
376 
377     // For the first part, use the recurrence phi (v1), otherwise v2.
378     auto *V1 = State.get(getOperand(0), 0);
379     Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
380     if (!PartMinus1->getType()->isVectorTy())
381       return PartMinus1;
382     Value *V2 = State.get(getOperand(1), Part);
383     return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name);
384   }
385   case VPInstruction::CalculateTripCountMinusVF: {
386     if (Part != 0)
387       return State.get(this, 0, /*IsScalar*/ true);
388 
389     Value *ScalarTC = State.get(getOperand(0), {0, 0});
390     Value *Step =
391         createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);
392     Value *Sub = Builder.CreateSub(ScalarTC, Step);
393     Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
394     Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
395     return Builder.CreateSelect(Cmp, Sub, Zero);
396   }
397   case VPInstruction::ExplicitVectorLength: {
398     // Compute EVL
399     auto GetEVL = [=](VPTransformState &State, Value *AVL) {
400       assert(AVL->getType()->isIntegerTy() &&
401              "Requested vector length should be an integer.");
402 
403       // TODO: Add support for MaxSafeDist for correct loop emission.
404       assert(State.VF.isScalable() && "Expected scalable vector factor.");
405       Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
406 
407       Value *EVL = State.Builder.CreateIntrinsic(
408           State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
409           {AVL, VFArg, State.Builder.getTrue()});
410       return EVL;
411     };
412     // TODO: Restructure this code with an explicit remainder loop, vsetvli can
413     // be outside of the main loop.
414     assert(Part == 0 && "No unrolling expected for predicated vectorization.");
415     // Compute VTC - IV as the AVL (requested vector length).
416     Value *Index = State.get(getOperand(0), VPIteration(0, 0));
417     Value *TripCount = State.get(getOperand(1), VPIteration(0, 0));
418     Value *AVL = State.Builder.CreateSub(TripCount, Index);
419     Value *EVL = GetEVL(State, AVL);
420     return EVL;
421   }
422   case VPInstruction::CanonicalIVIncrementForPart: {
423     auto *IV = State.get(getOperand(0), VPIteration(0, 0));
424     if (Part == 0)
425       return IV;
426 
427     // The canonical IV is incremented by the vectorization factor (num of SIMD
428     // elements) times the unroll part.
429     Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
430     return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
431                              hasNoSignedWrap());
432   }
433   case VPInstruction::BranchOnCond: {
434     if (Part != 0)
435       return nullptr;
436 
437     Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
438     VPRegionBlock *ParentRegion = getParent()->getParent();
439     VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
440 
441     // Replace the temporary unreachable terminator with a new conditional
442     // branch, hooking it up to backward destination for exiting blocks now and
443     // to forward destination(s) later when they are created.
444     BranchInst *CondBr =
445         Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
446 
447     if (getParent()->isExiting())
448       CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
449 
450     CondBr->setSuccessor(0, nullptr);
451     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
452     return CondBr;
453   }
454   case VPInstruction::BranchOnCount: {
455     if (Part != 0)
456       return nullptr;
457     // First create the compare.
458     Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true);
459     Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true);
460     Value *Cond = Builder.CreateICmpEQ(IV, TC);
461 
462     // Now create the branch.
463     auto *Plan = getParent()->getPlan();
464     VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
465     VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
466 
467     // Replace the temporary unreachable terminator with a new conditional
468     // branch, hooking it up to backward destination (the header) now and to the
469     // forward destination (the exit/middle block) later when it is created.
470     // Note that CreateCondBr expects a valid BB as first argument, so we need
471     // to set it to nullptr later.
472     BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
473                                               State.CFG.VPBB2IRBB[Header]);
474     CondBr->setSuccessor(0, nullptr);
475     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
476     return CondBr;
477   }
478   case VPInstruction::ComputeReductionResult: {
479     if (Part != 0)
480       return State.get(this, 0, /*IsScalar*/ true);
481 
482     // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
483     // and will be removed by breaking up the recipe further.
484     auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
485     auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
486     // Get its reduction variable descriptor.
487     const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
488 
489     RecurKind RK = RdxDesc.getRecurrenceKind();
490 
491     VPValue *LoopExitingDef = getOperand(1);
492     Type *PhiTy = OrigPhi->getType();
493     VectorParts RdxParts(State.UF);
494     for (unsigned Part = 0; Part < State.UF; ++Part)
495       RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop());
496 
497     // If the vector reduction can be performed in a smaller type, we truncate
498     // then extend the loop exit value to enable InstCombine to evaluate the
499     // entire expression in the smaller type.
500     // TODO: Handle this in truncateToMinBW.
501     if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
502       Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
503       for (unsigned Part = 0; Part < State.UF; ++Part)
504         RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
505     }
506     // Reduce all of the unrolled parts into a single vector.
507     Value *ReducedPartRdx = RdxParts[0];
508     unsigned Op = RecurrenceDescriptor::getOpcode(RK);
509 
510     if (PhiR->isOrdered()) {
511       ReducedPartRdx = RdxParts[State.UF - 1];
512     } else {
513       // Floating-point operations should have some FMF to enable the reduction.
514       IRBuilderBase::FastMathFlagGuard FMFG(Builder);
515       Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
516       for (unsigned Part = 1; Part < State.UF; ++Part) {
517         Value *RdxPart = RdxParts[Part];
518         if (Op != Instruction::ICmp && Op != Instruction::FCmp)
519           ReducedPartRdx = Builder.CreateBinOp(
520               (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
521         else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
522           TrackingVH<Value> ReductionStartValue =
523               RdxDesc.getRecurrenceStartValue();
524           ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK,
525                                          ReducedPartRdx, RdxPart);
526         } else
527           ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
528       }
529     }
530 
531     // Create the reduction after the loop. Note that inloop reductions create
532     // the target reduction in the loop using a Reduction recipe.
533     if (State.VF.isVector() && !PhiR->isInLoop()) {
534       ReducedPartRdx =
535           createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
536       // If the reduction can be performed in a smaller type, we need to extend
537       // the reduction to the wider type before we branch to the original loop.
538       if (PhiTy != RdxDesc.getRecurrenceType())
539         ReducedPartRdx = RdxDesc.isSigned()
540                              ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
541                              : Builder.CreateZExt(ReducedPartRdx, PhiTy);
542     }
543 
544     // If there were stores of the reduction value to a uniform memory address
545     // inside the loop, create the final store here.
546     if (StoreInst *SI = RdxDesc.IntermediateStore) {
547       auto *NewSI = Builder.CreateAlignedStore(
548           ReducedPartRdx, SI->getPointerOperand(), SI->getAlign());
549       propagateMetadata(NewSI, SI);
550     }
551 
552     return ReducedPartRdx;
553   }
554   case VPInstruction::PtrAdd: {
555     assert(vputils::onlyFirstLaneUsed(this) &&
556            "can only generate first lane for PtrAdd");
557     Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true);
558     Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true);
559     return Builder.CreatePtrAdd(Ptr, Addend, Name);
560   }
561   default:
562     llvm_unreachable("Unsupported opcode for instruction");
563   }
564 }
565 
566 #if !defined(NDEBUG)
567 bool VPInstruction::isFPMathOp() const {
568   // Inspired by FPMathOperator::classof. Notable differences are that we don't
569   // support Call, PHI and Select opcodes here yet.
570   return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
571          Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
572          Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
573          Opcode == Instruction::FCmp || Opcode == Instruction::Select;
574 }
575 #endif
576 
577 void VPInstruction::execute(VPTransformState &State) {
578   assert(!State.Instance && "VPInstruction executing an Instance");
579   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
580   assert((hasFastMathFlags() == isFPMathOp() ||
581           getOpcode() == Instruction::Select) &&
582          "Recipe not a FPMathOp but has fast-math flags?");
583   if (hasFastMathFlags())
584     State.Builder.setFastMathFlags(getFastMathFlags());
585   State.setDebugLocFrom(getDebugLoc());
586   bool GeneratesPerFirstLaneOnly =
587       canGenerateScalarForFirstLane() &&
588       (vputils::onlyFirstLaneUsed(this) ||
589        getOpcode() == VPInstruction::ComputeReductionResult);
590   bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
591   for (unsigned Part = 0; Part < State.UF; ++Part) {
592     if (GeneratesPerAllLanes) {
593       for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
594            Lane != NumLanes; ++Lane) {
595         Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane));
596         assert(GeneratedValue && "generatePerLane must produce a value");
597         State.set(this, GeneratedValue, VPIteration(Part, Lane));
598       }
599       continue;
600     }
601 
602     Value *GeneratedValue = generatePerPart(State, Part);
603     if (!hasResult())
604       continue;
605     assert(GeneratedValue && "generatePerPart must produce a value");
606     assert((GeneratedValue->getType()->isVectorTy() ==
607                 !GeneratesPerFirstLaneOnly ||
608             State.VF.isScalar()) &&
609            "scalar value but not only first lane defined");
610     State.set(this, GeneratedValue, Part,
611               /*IsScalar*/ GeneratesPerFirstLaneOnly);
612   }
613 }
614 
615 bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
616   assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
617   if (Instruction::isBinaryOp(getOpcode()))
618     return vputils::onlyFirstLaneUsed(this);
619 
620   switch (getOpcode()) {
621   default:
622     return false;
623   case Instruction::ICmp:
624   case VPInstruction::PtrAdd:
625     // TODO: Cover additional opcodes.
626     return vputils::onlyFirstLaneUsed(this);
627   case VPInstruction::ActiveLaneMask:
628   case VPInstruction::ExplicitVectorLength:
629   case VPInstruction::CalculateTripCountMinusVF:
630   case VPInstruction::CanonicalIVIncrementForPart:
631   case VPInstruction::BranchOnCount:
632     return true;
633   };
634   llvm_unreachable("switch should return");
635 }
636 
637 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
638 void VPInstruction::dump() const {
639   VPSlotTracker SlotTracker(getParent()->getPlan());
640   print(dbgs(), "", SlotTracker);
641 }
642 
643 void VPInstruction::print(raw_ostream &O, const Twine &Indent,
644                           VPSlotTracker &SlotTracker) const {
645   O << Indent << "EMIT ";
646 
647   if (hasResult()) {
648     printAsOperand(O, SlotTracker);
649     O << " = ";
650   }
651 
652   switch (getOpcode()) {
653   case VPInstruction::Not:
654     O << "not";
655     break;
656   case VPInstruction::SLPLoad:
657     O << "combined load";
658     break;
659   case VPInstruction::SLPStore:
660     O << "combined store";
661     break;
662   case VPInstruction::ActiveLaneMask:
663     O << "active lane mask";
664     break;
665   case VPInstruction::ExplicitVectorLength:
666     O << "EXPLICIT-VECTOR-LENGTH";
667     break;
668   case VPInstruction::FirstOrderRecurrenceSplice:
669     O << "first-order splice";
670     break;
671   case VPInstruction::BranchOnCond:
672     O << "branch-on-cond";
673     break;
674   case VPInstruction::CalculateTripCountMinusVF:
675     O << "TC > VF ? TC - VF : 0";
676     break;
677   case VPInstruction::CanonicalIVIncrementForPart:
678     O << "VF * Part +";
679     break;
680   case VPInstruction::BranchOnCount:
681     O << "branch-on-count";
682     break;
683   case VPInstruction::ComputeReductionResult:
684     O << "compute-reduction-result";
685     break;
686   case VPInstruction::PtrAdd:
687     O << "ptradd";
688     break;
689   default:
690     O << Instruction::getOpcodeName(getOpcode());
691   }
692 
693   printFlags(O);
694   printOperands(O, SlotTracker);
695 
696   if (auto DL = getDebugLoc()) {
697     O << ", !dbg ";
698     DL.print(O);
699   }
700 }
701 #endif
702 
703 void VPWidenCallRecipe::execute(VPTransformState &State) {
704   assert(State.VF.isVector() && "not widening");
705   auto &CI = *cast<CallInst>(getUnderlyingInstr());
706   assert(!isa<DbgInfoIntrinsic>(CI) &&
707          "DbgInfoIntrinsic should have been dropped during VPlan construction");
708   State.setDebugLocFrom(getDebugLoc());
709 
710   bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic;
711   FunctionType *VFTy = nullptr;
712   if (Variant)
713     VFTy = Variant->getFunctionType();
714   for (unsigned Part = 0; Part < State.UF; ++Part) {
715     SmallVector<Type *, 2> TysForDecl;
716     // Add return type if intrinsic is overloaded on it.
717     if (UseIntrinsic &&
718         isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
719       TysForDecl.push_back(
720           VectorType::get(CI.getType()->getScalarType(), State.VF));
721     SmallVector<Value *, 4> Args;
722     for (const auto &I : enumerate(operands())) {
723       // Some intrinsics have a scalar argument - don't replace it with a
724       // vector.
725       Value *Arg;
726       if (UseIntrinsic &&
727           isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
728         Arg = State.get(I.value(), VPIteration(0, 0));
729       // Some vectorized function variants may also take a scalar argument,
730       // e.g. linear parameters for pointers. This needs to be the scalar value
731       // from the start of the respective part when interleaving.
732       else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy())
733         Arg = State.get(I.value(), VPIteration(Part, 0));
734       else
735         Arg = State.get(I.value(), Part);
736       if (UseIntrinsic &&
737           isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
738         TysForDecl.push_back(Arg->getType());
739       Args.push_back(Arg);
740     }
741 
742     Function *VectorF;
743     if (UseIntrinsic) {
744       // Use vector version of the intrinsic.
745       Module *M = State.Builder.GetInsertBlock()->getModule();
746       VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
747       assert(VectorF && "Can't retrieve vector intrinsic.");
748     } else {
749 #ifndef NDEBUG
750       assert(Variant != nullptr && "Can't create vector function.");
751 #endif
752       VectorF = Variant;
753     }
754 
755     SmallVector<OperandBundleDef, 1> OpBundles;
756     CI.getOperandBundlesAsDefs(OpBundles);
757     CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
758 
759     if (isa<FPMathOperator>(V))
760       V->copyFastMathFlags(&CI);
761 
762     if (!V->getType()->isVoidTy())
763       State.set(this, V, Part);
764     State.addMetadata(V, &CI);
765   }
766 }
767 
768 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
769 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
770                               VPSlotTracker &SlotTracker) const {
771   O << Indent << "WIDEN-CALL ";
772 
773   auto *CI = cast<CallInst>(getUnderlyingInstr());
774   if (CI->getType()->isVoidTy())
775     O << "void ";
776   else {
777     printAsOperand(O, SlotTracker);
778     O << " = ";
779   }
780 
781   O << "call @" << CI->getCalledFunction()->getName() << "(";
782   printOperands(O, SlotTracker);
783   O << ")";
784 
785   if (VectorIntrinsicID)
786     O << " (using vector intrinsic)";
787   else {
788     O << " (using library function";
789     if (Variant->hasName())
790       O << ": " << Variant->getName();
791     O << ")";
792   }
793 }
794 
795 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
796                                 VPSlotTracker &SlotTracker) const {
797   O << Indent << "WIDEN-SELECT ";
798   printAsOperand(O, SlotTracker);
799   O << " = select ";
800   getOperand(0)->printAsOperand(O, SlotTracker);
801   O << ", ";
802   getOperand(1)->printAsOperand(O, SlotTracker);
803   O << ", ";
804   getOperand(2)->printAsOperand(O, SlotTracker);
805   O << (isInvariantCond() ? " (condition is loop invariant)" : "");
806 }
807 #endif
808 
809 void VPWidenSelectRecipe::execute(VPTransformState &State) {
810   State.setDebugLocFrom(getDebugLoc());
811 
812   // The condition can be loop invariant but still defined inside the
813   // loop. This means that we can't just use the original 'cond' value.
814   // We have to take the 'vectorized' value and pick the first lane.
815   // Instcombine will make this a no-op.
816   auto *InvarCond =
817       isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
818 
819   for (unsigned Part = 0; Part < State.UF; ++Part) {
820     Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part);
821     Value *Op0 = State.get(getOperand(1), Part);
822     Value *Op1 = State.get(getOperand(2), Part);
823     Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
824     State.set(this, Sel, Part);
825     State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
826   }
827 }
828 
829 VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
830     const FastMathFlags &FMF) {
831   AllowReassoc = FMF.allowReassoc();
832   NoNaNs = FMF.noNaNs();
833   NoInfs = FMF.noInfs();
834   NoSignedZeros = FMF.noSignedZeros();
835   AllowReciprocal = FMF.allowReciprocal();
836   AllowContract = FMF.allowContract();
837   ApproxFunc = FMF.approxFunc();
838 }
839 
840 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
841 void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
842   switch (OpType) {
843   case OperationType::Cmp:
844     O << " " << CmpInst::getPredicateName(getPredicate());
845     break;
846   case OperationType::DisjointOp:
847     if (DisjointFlags.IsDisjoint)
848       O << " disjoint";
849     break;
850   case OperationType::PossiblyExactOp:
851     if (ExactFlags.IsExact)
852       O << " exact";
853     break;
854   case OperationType::OverflowingBinOp:
855     if (WrapFlags.HasNUW)
856       O << " nuw";
857     if (WrapFlags.HasNSW)
858       O << " nsw";
859     break;
860   case OperationType::FPMathOp:
861     getFastMathFlags().print(O);
862     break;
863   case OperationType::GEPOp:
864     if (GEPFlags.IsInBounds)
865       O << " inbounds";
866     break;
867   case OperationType::NonNegOp:
868     if (NonNegFlags.NonNeg)
869       O << " nneg";
870     break;
871   case OperationType::Other:
872     break;
873   }
874   if (getNumOperands() > 0)
875     O << " ";
876 }
877 #endif
878 
879 void VPWidenRecipe::execute(VPTransformState &State) {
880   State.setDebugLocFrom(getDebugLoc());
881   auto &Builder = State.Builder;
882   switch (Opcode) {
883   case Instruction::Call:
884   case Instruction::Br:
885   case Instruction::PHI:
886   case Instruction::GetElementPtr:
887   case Instruction::Select:
888     llvm_unreachable("This instruction is handled by a different recipe.");
889   case Instruction::UDiv:
890   case Instruction::SDiv:
891   case Instruction::SRem:
892   case Instruction::URem:
893   case Instruction::Add:
894   case Instruction::FAdd:
895   case Instruction::Sub:
896   case Instruction::FSub:
897   case Instruction::FNeg:
898   case Instruction::Mul:
899   case Instruction::FMul:
900   case Instruction::FDiv:
901   case Instruction::FRem:
902   case Instruction::Shl:
903   case Instruction::LShr:
904   case Instruction::AShr:
905   case Instruction::And:
906   case Instruction::Or:
907   case Instruction::Xor: {
908     // Just widen unops and binops.
909     for (unsigned Part = 0; Part < State.UF; ++Part) {
910       SmallVector<Value *, 2> Ops;
911       for (VPValue *VPOp : operands())
912         Ops.push_back(State.get(VPOp, Part));
913 
914       Value *V = Builder.CreateNAryOp(Opcode, Ops);
915 
916       if (auto *VecOp = dyn_cast<Instruction>(V))
917         setFlags(VecOp);
918 
919       // Use this vector value for all users of the original instruction.
920       State.set(this, V, Part);
921       State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
922     }
923 
924     break;
925   }
926   case Instruction::Freeze: {
927     for (unsigned Part = 0; Part < State.UF; ++Part) {
928       Value *Op = State.get(getOperand(0), Part);
929 
930       Value *Freeze = Builder.CreateFreeze(Op);
931       State.set(this, Freeze, Part);
932     }
933     break;
934   }
935   case Instruction::ICmp:
936   case Instruction::FCmp: {
937     // Widen compares. Generate vector compares.
938     bool FCmp = Opcode == Instruction::FCmp;
939     for (unsigned Part = 0; Part < State.UF; ++Part) {
940       Value *A = State.get(getOperand(0), Part);
941       Value *B = State.get(getOperand(1), Part);
942       Value *C = nullptr;
943       if (FCmp) {
944         // Propagate fast math flags.
945         IRBuilder<>::FastMathFlagGuard FMFG(Builder);
946         if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
947           Builder.setFastMathFlags(I->getFastMathFlags());
948         C = Builder.CreateFCmp(getPredicate(), A, B);
949       } else {
950         C = Builder.CreateICmp(getPredicate(), A, B);
951       }
952       State.set(this, C, Part);
953       State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
954     }
955 
956     break;
957   }
958   default:
959     // This instruction is not vectorized by simple widening.
960     LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
961                       << Instruction::getOpcodeName(Opcode));
962     llvm_unreachable("Unhandled instruction!");
963   } // end of switch.
964 
965 #if !defined(NDEBUG)
966   // Verify that VPlan type inference results agree with the type of the
967   // generated values.
968   for (unsigned Part = 0; Part < State.UF; ++Part) {
969     assert(VectorType::get(State.TypeAnalysis.inferScalarType(this),
970                            State.VF) == State.get(this, Part)->getType() &&
971            "inferred type and type from generated instructions do not match");
972   }
973 #endif
974 }
975 
976 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
977 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
978                           VPSlotTracker &SlotTracker) const {
979   O << Indent << "WIDEN ";
980   printAsOperand(O, SlotTracker);
981   O << " = " << Instruction::getOpcodeName(Opcode);
982   printFlags(O);
983   printOperands(O, SlotTracker);
984 }
985 #endif
986 
987 void VPWidenCastRecipe::execute(VPTransformState &State) {
988   State.setDebugLocFrom(getDebugLoc());
989   auto &Builder = State.Builder;
990   /// Vectorize casts.
991   assert(State.VF.isVector() && "Not vectorizing?");
992   Type *DestTy = VectorType::get(getResultType(), State.VF);
993   VPValue *Op = getOperand(0);
994   for (unsigned Part = 0; Part < State.UF; ++Part) {
995     if (Part > 0 && Op->isLiveIn()) {
996       // FIXME: Remove once explicit unrolling is implemented using VPlan.
997       State.set(this, State.get(this, 0), Part);
998       continue;
999     }
1000     Value *A = State.get(Op, Part);
1001     Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
1002     State.set(this, Cast, Part);
1003     State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
1004   }
1005 }
1006 
1007 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1008 void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
1009                               VPSlotTracker &SlotTracker) const {
1010   O << Indent << "WIDEN-CAST ";
1011   printAsOperand(O, SlotTracker);
1012   O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1013   printFlags(O);
1014   printOperands(O, SlotTracker);
1015   O << " to " << *getResultType();
1016 }
1017 #endif
1018 
1019 /// This function adds
1020 /// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
1021 /// to each vector element of Val. The sequence starts at StartIndex.
1022 /// \p Opcode is relevant for FP induction variable.
1023 static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
1024                             Instruction::BinaryOps BinOp, ElementCount VF,
1025                             IRBuilderBase &Builder) {
1026   assert(VF.isVector() && "only vector VFs are supported");
1027 
1028   // Create and check the types.
1029   auto *ValVTy = cast<VectorType>(Val->getType());
1030   ElementCount VLen = ValVTy->getElementCount();
1031 
1032   Type *STy = Val->getType()->getScalarType();
1033   assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
1034          "Induction Step must be an integer or FP");
1035   assert(Step->getType() == STy && "Step has wrong type");
1036 
1037   SmallVector<Constant *, 8> Indices;
1038 
1039   // Create a vector of consecutive numbers from zero to VF.
1040   VectorType *InitVecValVTy = ValVTy;
1041   if (STy->isFloatingPointTy()) {
1042     Type *InitVecValSTy =
1043         IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());
1044     InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1045   }
1046   Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1047 
1048   // Splat the StartIdx
1049   Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
1050 
1051   if (STy->isIntegerTy()) {
1052     InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
1053     Step = Builder.CreateVectorSplat(VLen, Step);
1054     assert(Step->getType() == Val->getType() && "Invalid step vec");
1055     // FIXME: The newly created binary instructions should contain nsw/nuw
1056     // flags, which can be found from the original scalar operations.
1057     Step = Builder.CreateMul(InitVec, Step);
1058     return Builder.CreateAdd(Val, Step, "induction");
1059   }
1060 
1061   // Floating point induction.
1062   assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
1063          "Binary Opcode should be specified for FP induction");
1064   InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
1065   InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
1066 
1067   Step = Builder.CreateVectorSplat(VLen, Step);
1068   Value *MulOp = Builder.CreateFMul(InitVec, Step);
1069   return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
1070 }
1071 
1072 /// A helper function that returns an integer or floating-point constant with
1073 /// value C.
1074 static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
1075   return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
1076                            : ConstantFP::get(Ty, C);
1077 }
1078 
1079 static Value *getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy,
1080                                   ElementCount VF) {
1081   assert(FTy->isFloatingPointTy() && "Expected floating point type!");
1082   Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
1083   Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
1084   return B.CreateUIToFP(RuntimeVF, FTy);
1085 }
1086 
1087 void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
1088   assert(!State.Instance && "Int or FP induction being replicated.");
1089 
1090   Value *Start = getStartValue()->getLiveInIRValue();
1091   const InductionDescriptor &ID = getInductionDescriptor();
1092   TruncInst *Trunc = getTruncInst();
1093   IRBuilderBase &Builder = State.Builder;
1094   assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
1095   assert(State.VF.isVector() && "must have vector VF");
1096 
1097   // The value from the original loop to which we are mapping the new induction
1098   // variable.
1099   Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
1100 
1101   // Fast-math-flags propagate from the original induction instruction.
1102   IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1103   if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
1104     Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1105 
1106   // Now do the actual transformations, and start with fetching the step value.
1107   Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1108 
1109   assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
1110          "Expected either an induction phi-node or a truncate of it!");
1111 
1112   // Construct the initial value of the vector IV in the vector loop preheader
1113   auto CurrIP = Builder.saveIP();
1114   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1115   Builder.SetInsertPoint(VectorPH->getTerminator());
1116   if (isa<TruncInst>(EntryVal)) {
1117     assert(Start->getType()->isIntegerTy() &&
1118            "Truncation requires an integer type");
1119     auto *TruncType = cast<IntegerType>(EntryVal->getType());
1120     Step = Builder.CreateTrunc(Step, TruncType);
1121     Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
1122   }
1123 
1124   Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
1125   Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1126   Value *SteppedStart = getStepVector(
1127       SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
1128 
1129   // We create vector phi nodes for both integer and floating-point induction
1130   // variables. Here, we determine the kind of arithmetic we will perform.
1131   Instruction::BinaryOps AddOp;
1132   Instruction::BinaryOps MulOp;
1133   if (Step->getType()->isIntegerTy()) {
1134     AddOp = Instruction::Add;
1135     MulOp = Instruction::Mul;
1136   } else {
1137     AddOp = ID.getInductionOpcode();
1138     MulOp = Instruction::FMul;
1139   }
1140 
1141   // Multiply the vectorization factor by the step using integer or
1142   // floating-point arithmetic as appropriate.
1143   Type *StepType = Step->getType();
1144   Value *RuntimeVF;
1145   if (Step->getType()->isFloatingPointTy())
1146     RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
1147   else
1148     RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
1149   Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
1150 
1151   // Create a vector splat to use in the induction update.
1152   //
1153   // FIXME: If the step is non-constant, we create the vector splat with
1154   //        IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
1155   //        handle a constant vector splat.
1156   Value *SplatVF = isa<Constant>(Mul)
1157                        ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
1158                        : Builder.CreateVectorSplat(State.VF, Mul);
1159   Builder.restoreIP(CurrIP);
1160 
1161   // We may need to add the step a number of times, depending on the unroll
1162   // factor. The last of those goes into the PHI.
1163   PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
1164   VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1165   VecInd->setDebugLoc(EntryVal->getDebugLoc());
1166   Instruction *LastInduction = VecInd;
1167   for (unsigned Part = 0; Part < State.UF; ++Part) {
1168     State.set(this, LastInduction, Part);
1169 
1170     if (isa<TruncInst>(EntryVal))
1171       State.addMetadata(LastInduction, EntryVal);
1172 
1173     LastInduction = cast<Instruction>(
1174         Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
1175     LastInduction->setDebugLoc(EntryVal->getDebugLoc());
1176   }
1177 
1178   LastInduction->setName("vec.ind.next");
1179   VecInd->addIncoming(SteppedStart, VectorPH);
1180   // Add induction update using an incorrect block temporarily. The phi node
1181   // will be fixed after VPlan execution. Note that at this point the latch
1182   // block cannot be used, as it does not exist yet.
1183   // TODO: Model increment value in VPlan, by turning the recipe into a
1184   // multi-def and a subclass of VPHeaderPHIRecipe.
1185   VecInd->addIncoming(LastInduction, VectorPH);
1186 }
1187 
1188 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1189 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
1190                                           VPSlotTracker &SlotTracker) const {
1191   O << Indent << "WIDEN-INDUCTION";
1192   if (getTruncInst()) {
1193     O << "\\l\"";
1194     O << " +\n" << Indent << "\"  " << VPlanIngredient(IV) << "\\l\"";
1195     O << " +\n" << Indent << "\"  ";
1196     getVPValue(0)->printAsOperand(O, SlotTracker);
1197   } else
1198     O << " " << VPlanIngredient(IV);
1199 
1200   O << ", ";
1201   getStepValue()->printAsOperand(O, SlotTracker);
1202 }
1203 #endif
1204 
1205 bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
1206   // The step may be defined by a recipe in the preheader (e.g. if it requires
1207   // SCEV expansion), but for the canonical induction the step is required to be
1208   // 1, which is represented as live-in.
1209   if (getStepValue()->getDefiningRecipe())
1210     return false;
1211   auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
1212   auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1213   return StartC && StartC->isZero() && StepC && StepC->isOne();
1214 }
1215 
1216 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1217 void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent,
1218                               VPSlotTracker &SlotTracker) const {
1219   O << Indent;
1220   printAsOperand(O, SlotTracker);
1221   O << Indent << "= DERIVED-IV ";
1222   getStartValue()->printAsOperand(O, SlotTracker);
1223   O << " + ";
1224   getOperand(1)->printAsOperand(O, SlotTracker);
1225   O << " * ";
1226   getStepValue()->printAsOperand(O, SlotTracker);
1227 }
1228 #endif
1229 
1230 void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
1231   // Fast-math-flags propagate from the original induction instruction.
1232   IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
1233   if (hasFastMathFlags())
1234     State.Builder.setFastMathFlags(getFastMathFlags());
1235 
1236   /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1237   /// variable on which to base the steps, \p Step is the size of the step.
1238 
1239   Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0));
1240   Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1241   IRBuilderBase &Builder = State.Builder;
1242 
1243   // Ensure step has the same type as that of scalar IV.
1244   Type *BaseIVTy = BaseIV->getType()->getScalarType();
1245   assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1246 
1247   // We build scalar steps for both integer and floating-point induction
1248   // variables. Here, we determine the kind of arithmetic we will perform.
1249   Instruction::BinaryOps AddOp;
1250   Instruction::BinaryOps MulOp;
1251   if (BaseIVTy->isIntegerTy()) {
1252     AddOp = Instruction::Add;
1253     MulOp = Instruction::Mul;
1254   } else {
1255     AddOp = InductionOpcode;
1256     MulOp = Instruction::FMul;
1257   }
1258 
1259   // Determine the number of scalars we need to generate for each unroll
1260   // iteration.
1261   bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1262   // Compute the scalar steps and save the results in State.
1263   Type *IntStepTy =
1264       IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1265   Type *VecIVTy = nullptr;
1266   Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1267   if (!FirstLaneOnly && State.VF.isScalable()) {
1268     VecIVTy = VectorType::get(BaseIVTy, State.VF);
1269     UnitStepVec =
1270         Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1271     SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1272     SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1273   }
1274 
1275   unsigned StartPart = 0;
1276   unsigned EndPart = State.UF;
1277   unsigned StartLane = 0;
1278   unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1279   if (State.Instance) {
1280     StartPart = State.Instance->Part;
1281     EndPart = StartPart + 1;
1282     StartLane = State.Instance->Lane.getKnownLane();
1283     EndLane = StartLane + 1;
1284   }
1285   for (unsigned Part = StartPart; Part < EndPart; ++Part) {
1286     Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
1287 
1288     if (!FirstLaneOnly && State.VF.isScalable()) {
1289       auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1290       auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1291       if (BaseIVTy->isFloatingPointTy())
1292         InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1293       auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1294       auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1295       State.set(this, Add, Part);
1296       // It's useful to record the lane values too for the known minimum number
1297       // of elements so we do those below. This improves the code quality when
1298       // trying to extract the first element, for example.
1299     }
1300 
1301     if (BaseIVTy->isFloatingPointTy())
1302       StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1303 
1304     for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1305       Value *StartIdx = Builder.CreateBinOp(
1306           AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1307       // The step returned by `createStepForVF` is a runtime-evaluated value
1308       // when VF is scalable. Otherwise, it should be folded into a Constant.
1309       assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1310              "Expected StartIdx to be folded to a constant when VF is not "
1311              "scalable");
1312       auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1313       auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1314       State.set(this, Add, VPIteration(Part, Lane));
1315     }
1316   }
1317 }
1318 
1319 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1320 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
1321                                   VPSlotTracker &SlotTracker) const {
1322   O << Indent;
1323   printAsOperand(O, SlotTracker);
1324   O << " = SCALAR-STEPS ";
1325   printOperands(O, SlotTracker);
1326 }
1327 #endif
1328 
1329 void VPWidenGEPRecipe::execute(VPTransformState &State) {
1330   assert(State.VF.isVector() && "not widening");
1331   auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1332   // Construct a vector GEP by widening the operands of the scalar GEP as
1333   // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1334   // results in a vector of pointers when at least one operand of the GEP
1335   // is vector-typed. Thus, to keep the representation compact, we only use
1336   // vector-typed operands for loop-varying values.
1337 
1338   if (areAllOperandsInvariant()) {
1339     // If we are vectorizing, but the GEP has only loop-invariant operands,
1340     // the GEP we build (by only using vector-typed operands for
1341     // loop-varying values) would be a scalar pointer. Thus, to ensure we
1342     // produce a vector of pointers, we need to either arbitrarily pick an
1343     // operand to broadcast, or broadcast a clone of the original GEP.
1344     // Here, we broadcast a clone of the original.
1345     //
1346     // TODO: If at some point we decide to scalarize instructions having
1347     //       loop-invariant operands, this special case will no longer be
1348     //       required. We would add the scalarization decision to
1349     //       collectLoopScalars() and teach getVectorValue() to broadcast
1350     //       the lane-zero scalar value.
1351     SmallVector<Value *> Ops;
1352     for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1353       Ops.push_back(State.get(getOperand(I), VPIteration(0, 0)));
1354 
1355     auto *NewGEP =
1356         State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1357                                 ArrayRef(Ops).drop_front(), "", isInBounds());
1358     for (unsigned Part = 0; Part < State.UF; ++Part) {
1359       Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1360       State.set(this, EntryPart, Part);
1361       State.addMetadata(EntryPart, GEP);
1362     }
1363   } else {
1364     // If the GEP has at least one loop-varying operand, we are sure to
1365     // produce a vector of pointers. But if we are only unrolling, we want
1366     // to produce a scalar GEP for each unroll part. Thus, the GEP we
1367     // produce with the code below will be scalar (if VF == 1) or vector
1368     // (otherwise). Note that for the unroll-only case, we still maintain
1369     // values in the vector mapping with initVector, as we do for other
1370     // instructions.
1371     for (unsigned Part = 0; Part < State.UF; ++Part) {
1372       // The pointer operand of the new GEP. If it's loop-invariant, we
1373       // won't broadcast it.
1374       auto *Ptr = isPointerLoopInvariant()
1375                       ? State.get(getOperand(0), VPIteration(0, 0))
1376                       : State.get(getOperand(0), Part);
1377 
1378       // Collect all the indices for the new GEP. If any index is
1379       // loop-invariant, we won't broadcast it.
1380       SmallVector<Value *, 4> Indices;
1381       for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1382         VPValue *Operand = getOperand(I);
1383         if (isIndexLoopInvariant(I - 1))
1384           Indices.push_back(State.get(Operand, VPIteration(0, 0)));
1385         else
1386           Indices.push_back(State.get(Operand, Part));
1387       }
1388 
1389       // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1390       // but it should be a vector, otherwise.
1391       auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
1392                                              Indices, "", isInBounds());
1393       assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
1394              "NewGEP is not a pointer vector");
1395       State.set(this, NewGEP, Part);
1396       State.addMetadata(NewGEP, GEP);
1397     }
1398   }
1399 }
1400 
1401 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1402 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
1403                              VPSlotTracker &SlotTracker) const {
1404   O << Indent << "WIDEN-GEP ";
1405   O << (isPointerLoopInvariant() ? "Inv" : "Var");
1406   for (size_t I = 0; I < getNumOperands() - 1; ++I)
1407     O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
1408 
1409   O << " ";
1410   printAsOperand(O, SlotTracker);
1411   O << " = getelementptr";
1412   printFlags(O);
1413   printOperands(O, SlotTracker);
1414 }
1415 #endif
1416 
1417 void VPVectorPointerRecipe ::execute(VPTransformState &State) {
1418   auto &Builder = State.Builder;
1419   State.setDebugLocFrom(getDebugLoc());
1420   for (unsigned Part = 0; Part < State.UF; ++Part) {
1421     // Calculate the pointer for the specific unroll-part.
1422     Value *PartPtr = nullptr;
1423     // Use i32 for the gep index type when the value is constant,
1424     // or query DataLayout for a more suitable index type otherwise.
1425     const DataLayout &DL =
1426         Builder.GetInsertBlock()->getModule()->getDataLayout();
1427     Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0)
1428                         ? DL.getIndexType(IndexedTy->getPointerTo())
1429                         : Builder.getInt32Ty();
1430     Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
1431     bool InBounds = isInBounds();
1432     if (IsReverse) {
1433       // If the address is consecutive but reversed, then the
1434       // wide store needs to start at the last vector element.
1435       // RunTimeVF =  VScale * VF.getKnownMinValue()
1436       // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
1437       Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
1438       // NumElt = -Part * RunTimeVF
1439       Value *NumElt = Builder.CreateMul(
1440           ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);
1441       // LastLane = 1 - RunTimeVF
1442       Value *LastLane =
1443           Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
1444       PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
1445       PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds);
1446     } else {
1447       Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
1448       PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
1449     }
1450 
1451     State.set(this, PartPtr, Part, /*IsScalar*/ true);
1452   }
1453 }
1454 
1455 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1456 void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
1457                                   VPSlotTracker &SlotTracker) const {
1458   O << Indent;
1459   printAsOperand(O, SlotTracker);
1460   O << " = vector-pointer ";
1461   if (IsReverse)
1462     O << "(reverse) ";
1463 
1464   printOperands(O, SlotTracker);
1465 }
1466 #endif
1467 
1468 void VPBlendRecipe::execute(VPTransformState &State) {
1469   State.setDebugLocFrom(getDebugLoc());
1470   // We know that all PHIs in non-header blocks are converted into
1471   // selects, so we don't have to worry about the insertion order and we
1472   // can just use the builder.
1473   // At this point we generate the predication tree. There may be
1474   // duplications since this is a simple recursive scan, but future
1475   // optimizations will clean it up.
1476 
1477   unsigned NumIncoming = getNumIncomingValues();
1478 
1479   // Generate a sequence of selects of the form:
1480   // SELECT(Mask3, In3,
1481   //        SELECT(Mask2, In2,
1482   //               SELECT(Mask1, In1,
1483   //                      In0)))
1484   // Note that Mask0 is never used: lanes for which no path reaches this phi and
1485   // are essentially undef are taken from In0.
1486  VectorParts Entry(State.UF);
1487   for (unsigned In = 0; In < NumIncoming; ++In) {
1488     for (unsigned Part = 0; Part < State.UF; ++Part) {
1489       // We might have single edge PHIs (blocks) - use an identity
1490       // 'select' for the first PHI operand.
1491       Value *In0 = State.get(getIncomingValue(In), Part);
1492       if (In == 0)
1493         Entry[Part] = In0; // Initialize with the first incoming value.
1494       else {
1495         // Select between the current value and the previous incoming edge
1496         // based on the incoming mask.
1497         Value *Cond = State.get(getMask(In), Part);
1498         Entry[Part] =
1499             State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
1500       }
1501     }
1502   }
1503   for (unsigned Part = 0; Part < State.UF; ++Part)
1504     State.set(this, Entry[Part], Part);
1505 }
1506 
1507 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1508 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
1509                           VPSlotTracker &SlotTracker) const {
1510   O << Indent << "BLEND ";
1511   printAsOperand(O, SlotTracker);
1512   O << " =";
1513   if (getNumIncomingValues() == 1) {
1514     // Not a User of any mask: not really blending, this is a
1515     // single-predecessor phi.
1516     O << " ";
1517     getIncomingValue(0)->printAsOperand(O, SlotTracker);
1518   } else {
1519     for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
1520       O << " ";
1521       getIncomingValue(I)->printAsOperand(O, SlotTracker);
1522       if (I == 0)
1523         continue;
1524       O << "/";
1525       getMask(I)->printAsOperand(O, SlotTracker);
1526     }
1527   }
1528 }
1529 #endif
1530 
1531 void VPReductionRecipe::execute(VPTransformState &State) {
1532   assert(!State.Instance && "Reduction being replicated.");
1533   Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true);
1534   RecurKind Kind = RdxDesc.getRecurrenceKind();
1535   // Propagate the fast-math flags carried by the underlying instruction.
1536   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
1537   State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
1538   for (unsigned Part = 0; Part < State.UF; ++Part) {
1539     Value *NewVecOp = State.get(getVecOp(), Part);
1540     if (VPValue *Cond = getCondOp()) {
1541       Value *NewCond = State.get(Cond, Part, State.VF.isScalar());
1542       VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
1543       Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
1544       Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy,
1545                                                   RdxDesc.getFastMathFlags());
1546       if (State.VF.isVector()) {
1547         Iden = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
1548       }
1549 
1550       Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden);
1551       NewVecOp = Select;
1552     }
1553     Value *NewRed;
1554     Value *NextInChain;
1555     if (IsOrdered) {
1556       if (State.VF.isVector())
1557         NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp,
1558                                         PrevInChain);
1559       else
1560         NewRed = State.Builder.CreateBinOp(
1561             (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain,
1562             NewVecOp);
1563       PrevInChain = NewRed;
1564     } else {
1565       PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true);
1566       NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp);
1567     }
1568     if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
1569       NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
1570                                    NewRed, PrevInChain);
1571     } else if (IsOrdered)
1572       NextInChain = NewRed;
1573     else
1574       NextInChain = State.Builder.CreateBinOp(
1575           (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain);
1576     State.set(this, NextInChain, Part, /*IsScalar*/ true);
1577   }
1578 }
1579 
1580 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1581 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
1582                               VPSlotTracker &SlotTracker) const {
1583   O << Indent << "REDUCE ";
1584   printAsOperand(O, SlotTracker);
1585   O << " = ";
1586   getChainOp()->printAsOperand(O, SlotTracker);
1587   O << " +";
1588   if (isa<FPMathOperator>(getUnderlyingInstr()))
1589     O << getUnderlyingInstr()->getFastMathFlags();
1590   O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
1591   getVecOp()->printAsOperand(O, SlotTracker);
1592   if (getCondOp()) {
1593     O << ", ";
1594     getCondOp()->printAsOperand(O, SlotTracker);
1595   }
1596   O << ")";
1597   if (RdxDesc.IntermediateStore)
1598     O << " (with final reduction value stored in invariant address sank "
1599          "outside of loop)";
1600 }
1601 #endif
1602 
1603 bool VPReplicateRecipe::shouldPack() const {
1604   // Find if the recipe is used by a widened recipe via an intervening
1605   // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
1606   return any_of(users(), [](const VPUser *U) {
1607     if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
1608       return any_of(PredR->users(), [PredR](const VPUser *U) {
1609         return !U->usesScalars(PredR);
1610       });
1611     return false;
1612   });
1613 }
1614 
1615 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1616 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
1617                               VPSlotTracker &SlotTracker) const {
1618   O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
1619 
1620   if (!getUnderlyingInstr()->getType()->isVoidTy()) {
1621     printAsOperand(O, SlotTracker);
1622     O << " = ";
1623   }
1624   if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
1625     O << "call";
1626     printFlags(O);
1627     O << "@" << CB->getCalledFunction()->getName() << "(";
1628     interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
1629                     O, [&O, &SlotTracker](VPValue *Op) {
1630                       Op->printAsOperand(O, SlotTracker);
1631                     });
1632     O << ")";
1633   } else {
1634     O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode());
1635     printFlags(O);
1636     printOperands(O, SlotTracker);
1637   }
1638 
1639   if (shouldPack())
1640     O << " (S->V)";
1641 }
1642 #endif
1643 
1644 /// Checks if \p C is uniform across all VFs and UFs. It is considered as such
1645 /// if it is either defined outside the vector region or its operand is known to
1646 /// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).
1647 /// TODO: Uniformity should be associated with a VPValue and there should be a
1648 /// generic way to check.
1649 static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C) {
1650   return C->isDefinedOutsideVectorRegions() ||
1651          isa<VPDerivedIVRecipe>(C->getOperand(0)) ||
1652          isa<VPCanonicalIVPHIRecipe>(C->getOperand(0));
1653 }
1654 
1655 Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
1656   assert(vputils::onlyFirstLaneUsed(this) &&
1657          "Codegen only implemented for first lane.");
1658   switch (Opcode) {
1659   case Instruction::SExt:
1660   case Instruction::ZExt:
1661   case Instruction::Trunc: {
1662     // Note: SExt/ZExt not used yet.
1663     Value *Op = State.get(getOperand(0), VPIteration(Part, 0));
1664     return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
1665   }
1666   default:
1667     llvm_unreachable("opcode not implemented yet");
1668   }
1669 }
1670 
1671 void VPScalarCastRecipe ::execute(VPTransformState &State) {
1672   bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this);
1673   for (unsigned Part = 0; Part != State.UF; ++Part) {
1674     Value *Res;
1675     // Only generate a single instance, if the recipe is uniform across UFs and
1676     // VFs.
1677     if (Part > 0 && IsUniformAcrossVFsAndUFs)
1678       Res = State.get(this, VPIteration(0, 0));
1679     else
1680       Res = generate(State, Part);
1681     State.set(this, Res, VPIteration(Part, 0));
1682   }
1683 }
1684 
1685 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1686 void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
1687                                 VPSlotTracker &SlotTracker) const {
1688   O << Indent << "SCALAR-CAST ";
1689   printAsOperand(O, SlotTracker);
1690   O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1691   printOperands(O, SlotTracker);
1692   O << " to " << *ResultTy;
1693 }
1694 #endif
1695 
1696 void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
1697   assert(State.Instance && "Branch on Mask works only on single instance.");
1698 
1699   unsigned Part = State.Instance->Part;
1700   unsigned Lane = State.Instance->Lane.getKnownLane();
1701 
1702   Value *ConditionBit = nullptr;
1703   VPValue *BlockInMask = getMask();
1704   if (BlockInMask) {
1705     ConditionBit = State.get(BlockInMask, Part);
1706     if (ConditionBit->getType()->isVectorTy())
1707       ConditionBit = State.Builder.CreateExtractElement(
1708           ConditionBit, State.Builder.getInt32(Lane));
1709   } else // Block in mask is all-one.
1710     ConditionBit = State.Builder.getTrue();
1711 
1712   // Replace the temporary unreachable terminator with a new conditional branch,
1713   // whose two destinations will be set later when they are created.
1714   auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
1715   assert(isa<UnreachableInst>(CurrentTerminator) &&
1716          "Expected to replace unreachable terminator with conditional branch.");
1717   auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
1718   CondBr->setSuccessor(0, nullptr);
1719   ReplaceInstWithInst(CurrentTerminator, CondBr);
1720 }
1721 
1722 void VPPredInstPHIRecipe::execute(VPTransformState &State) {
1723   assert(State.Instance && "Predicated instruction PHI works per instance.");
1724   Instruction *ScalarPredInst =
1725       cast<Instruction>(State.get(getOperand(0), *State.Instance));
1726   BasicBlock *PredicatedBB = ScalarPredInst->getParent();
1727   BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
1728   assert(PredicatingBB && "Predicated block has no single predecessor.");
1729   assert(isa<VPReplicateRecipe>(getOperand(0)) &&
1730          "operand must be VPReplicateRecipe");
1731 
1732   // By current pack/unpack logic we need to generate only a single phi node: if
1733   // a vector value for the predicated instruction exists at this point it means
1734   // the instruction has vector users only, and a phi for the vector value is
1735   // needed. In this case the recipe of the predicated instruction is marked to
1736   // also do that packing, thereby "hoisting" the insert-element sequence.
1737   // Otherwise, a phi node for the scalar value is needed.
1738   unsigned Part = State.Instance->Part;
1739   if (State.hasVectorValue(getOperand(0), Part)) {
1740     Value *VectorValue = State.get(getOperand(0), Part);
1741     InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
1742     PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
1743     VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
1744     VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
1745     if (State.hasVectorValue(this, Part))
1746       State.reset(this, VPhi, Part);
1747     else
1748       State.set(this, VPhi, Part);
1749     // NOTE: Currently we need to update the value of the operand, so the next
1750     // predicated iteration inserts its generated value in the correct vector.
1751     State.reset(getOperand(0), VPhi, Part);
1752   } else {
1753     Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
1754     PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
1755     Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
1756                      PredicatingBB);
1757     Phi->addIncoming(ScalarPredInst, PredicatedBB);
1758     if (State.hasScalarValue(this, *State.Instance))
1759       State.reset(this, Phi, *State.Instance);
1760     else
1761       State.set(this, Phi, *State.Instance);
1762     // NOTE: Currently we need to update the value of the operand, so the next
1763     // predicated iteration inserts its generated value in the correct vector.
1764     State.reset(getOperand(0), Phi, *State.Instance);
1765   }
1766 }
1767 
1768 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1769 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1770                                 VPSlotTracker &SlotTracker) const {
1771   O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1772   printAsOperand(O, SlotTracker);
1773   O << " = ";
1774   printOperands(O, SlotTracker);
1775 }
1776 
1777 void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,
1778                               VPSlotTracker &SlotTracker) const {
1779   O << Indent << "WIDEN ";
1780   printAsOperand(O, SlotTracker);
1781   O << " = load ";
1782   printOperands(O, SlotTracker);
1783 }
1784 
1785 void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent,
1786                                  VPSlotTracker &SlotTracker) const {
1787   O << Indent << "WIDEN ";
1788   printAsOperand(O, SlotTracker);
1789   O << " = vp.load ";
1790   printOperands(O, SlotTracker);
1791 }
1792 
1793 void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
1794                                VPSlotTracker &SlotTracker) const {
1795   O << Indent << "WIDEN store ";
1796   printOperands(O, SlotTracker);
1797 }
1798 
1799 void VPWidenStoreEVLRecipe::print(raw_ostream &O, const Twine &Indent,
1800                                   VPSlotTracker &SlotTracker) const {
1801   O << Indent << "WIDEN vp.store ";
1802   printOperands(O, SlotTracker);
1803 }
1804 #endif
1805 
1806 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
1807   Value *Start = getStartValue()->getLiveInIRValue();
1808   PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index");
1809   EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1810 
1811   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1812   EntryPart->addIncoming(Start, VectorPH);
1813   EntryPart->setDebugLoc(getDebugLoc());
1814   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1815     State.set(this, EntryPart, Part, /*IsScalar*/ true);
1816 }
1817 
1818 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1819 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1820                                    VPSlotTracker &SlotTracker) const {
1821   O << Indent << "EMIT ";
1822   printAsOperand(O, SlotTracker);
1823   O << " = CANONICAL-INDUCTION ";
1824   printOperands(O, SlotTracker);
1825 }
1826 #endif
1827 
1828 bool VPCanonicalIVPHIRecipe::isCanonical(
1829     InductionDescriptor::InductionKind Kind, VPValue *Start,
1830     VPValue *Step) const {
1831   // Must be an integer induction.
1832   if (Kind != InductionDescriptor::IK_IntInduction)
1833     return false;
1834   // Start must match the start value of this canonical induction.
1835   if (Start != getStartValue())
1836     return false;
1837 
1838   // If the step is defined by a recipe, it is not a ConstantInt.
1839   if (Step->getDefiningRecipe())
1840     return false;
1841 
1842   ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
1843   return StepC && StepC->isOne();
1844 }
1845 
1846 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) {
1847   return IsScalarAfterVectorization &&
1848          (!IsScalable || vputils::onlyFirstLaneUsed(this));
1849 }
1850 
1851 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1852 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
1853                                           VPSlotTracker &SlotTracker) const {
1854   O << Indent << "EMIT ";
1855   printAsOperand(O, SlotTracker);
1856   O << " = WIDEN-POINTER-INDUCTION ";
1857   getStartValue()->printAsOperand(O, SlotTracker);
1858   O << ", " << *IndDesc.getStep();
1859 }
1860 #endif
1861 
1862 void VPExpandSCEVRecipe::execute(VPTransformState &State) {
1863   assert(!State.Instance && "cannot be used in per-lane");
1864   const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
1865   SCEVExpander Exp(SE, DL, "induction");
1866 
1867   Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
1868                                  &*State.Builder.GetInsertPoint());
1869   assert(!State.ExpandedSCEVs.contains(Expr) &&
1870          "Same SCEV expanded multiple times");
1871   State.ExpandedSCEVs[Expr] = Res;
1872   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1873     State.set(this, Res, {Part, 0});
1874 }
1875 
1876 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1877 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
1878                                VPSlotTracker &SlotTracker) const {
1879   O << Indent << "EMIT ";
1880   getVPSingleValue()->printAsOperand(O, SlotTracker);
1881   O << " = EXPAND SCEV " << *Expr;
1882 }
1883 #endif
1884 
1885 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
1886   Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true);
1887   Type *STy = CanonicalIV->getType();
1888   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
1889   ElementCount VF = State.VF;
1890   Value *VStart = VF.isScalar()
1891                       ? CanonicalIV
1892                       : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
1893   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1894     Value *VStep = createStepForVF(Builder, STy, VF, Part);
1895     if (VF.isVector()) {
1896       VStep = Builder.CreateVectorSplat(VF, VStep);
1897       VStep =
1898           Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
1899     }
1900     Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1901     State.set(this, CanonicalVectorIV, Part);
1902   }
1903 }
1904 
1905 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1906 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
1907                                      VPSlotTracker &SlotTracker) const {
1908   O << Indent << "EMIT ";
1909   printAsOperand(O, SlotTracker);
1910   O << " = WIDEN-CANONICAL-INDUCTION ";
1911   printOperands(O, SlotTracker);
1912 }
1913 #endif
1914 
1915 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
1916   auto &Builder = State.Builder;
1917   // Create a vector from the initial value.
1918   auto *VectorInit = getStartValue()->getLiveInIRValue();
1919 
1920   Type *VecTy = State.VF.isScalar()
1921                     ? VectorInit->getType()
1922                     : VectorType::get(VectorInit->getType(), State.VF);
1923 
1924   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1925   if (State.VF.isVector()) {
1926     auto *IdxTy = Builder.getInt32Ty();
1927     auto *One = ConstantInt::get(IdxTy, 1);
1928     IRBuilder<>::InsertPointGuard Guard(Builder);
1929     Builder.SetInsertPoint(VectorPH->getTerminator());
1930     auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
1931     auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
1932     VectorInit = Builder.CreateInsertElement(
1933         PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
1934   }
1935 
1936   // Create a phi node for the new recurrence.
1937   PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur");
1938   EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1939   EntryPart->addIncoming(VectorInit, VectorPH);
1940   State.set(this, EntryPart, 0);
1941 }
1942 
1943 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1944 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
1945                                             VPSlotTracker &SlotTracker) const {
1946   O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
1947   printAsOperand(O, SlotTracker);
1948   O << " = phi ";
1949   printOperands(O, SlotTracker);
1950 }
1951 #endif
1952 
1953 void VPReductionPHIRecipe::execute(VPTransformState &State) {
1954   auto &Builder = State.Builder;
1955 
1956   // Reductions do not have to start at zero. They can start with
1957   // any loop invariant values.
1958   VPValue *StartVPV = getStartValue();
1959   Value *StartV = StartVPV->getLiveInIRValue();
1960 
1961   // In order to support recurrences we need to be able to vectorize Phi nodes.
1962   // Phi nodes have cycles, so we need to vectorize them in two stages. This is
1963   // stage #1: We create a new vector PHI node with no incoming edges. We'll use
1964   // this value when we vectorize all of the instructions that use the PHI.
1965   bool ScalarPHI = State.VF.isScalar() || IsInLoop;
1966   Type *VecTy = ScalarPHI ? StartV->getType()
1967                           : VectorType::get(StartV->getType(), State.VF);
1968 
1969   BasicBlock *HeaderBB = State.CFG.PrevBB;
1970   assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
1971          "recipe must be in the vector loop header");
1972   unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
1973   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1974     Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");
1975     EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());
1976     State.set(this, EntryPart, Part, IsInLoop);
1977   }
1978 
1979   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1980 
1981   Value *Iden = nullptr;
1982   RecurKind RK = RdxDesc.getRecurrenceKind();
1983   if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
1984       RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
1985     // MinMax and AnyOf reductions have the start value as their identity.
1986     if (ScalarPHI) {
1987       Iden = StartV;
1988     } else {
1989       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1990       Builder.SetInsertPoint(VectorPH->getTerminator());
1991       StartV = Iden =
1992           Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
1993     }
1994   } else {
1995     Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
1996                                          RdxDesc.getFastMathFlags());
1997 
1998     if (!ScalarPHI) {
1999       Iden = Builder.CreateVectorSplat(State.VF, Iden);
2000       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
2001       Builder.SetInsertPoint(VectorPH->getTerminator());
2002       Constant *Zero = Builder.getInt32(0);
2003       StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
2004     }
2005   }
2006 
2007   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
2008     Value *EntryPart = State.get(this, Part, IsInLoop);
2009     // Make sure to add the reduction start value only to the
2010     // first unroll part.
2011     Value *StartVal = (Part == 0) ? StartV : Iden;
2012     cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
2013   }
2014 }
2015 
2016 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2017 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2018                                  VPSlotTracker &SlotTracker) const {
2019   O << Indent << "WIDEN-REDUCTION-PHI ";
2020 
2021   printAsOperand(O, SlotTracker);
2022   O << " = phi ";
2023   printOperands(O, SlotTracker);
2024 }
2025 #endif
2026 
2027 void VPWidenPHIRecipe::execute(VPTransformState &State) {
2028   assert(EnableVPlanNativePath &&
2029          "Non-native vplans are not expected to have VPWidenPHIRecipes.");
2030 
2031   Value *Op0 = State.get(getOperand(0), 0);
2032   Type *VecTy = Op0->getType();
2033   Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
2034   State.set(this, VecPhi, 0);
2035 }
2036 
2037 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2038 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2039                              VPSlotTracker &SlotTracker) const {
2040   O << Indent << "WIDEN-PHI ";
2041 
2042   auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
2043   // Unless all incoming values are modeled in VPlan  print the original PHI
2044   // directly.
2045   // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
2046   // values as VPValues.
2047   if (getNumOperands() != OriginalPhi->getNumOperands()) {
2048     O << VPlanIngredient(OriginalPhi);
2049     return;
2050   }
2051 
2052   printAsOperand(O, SlotTracker);
2053   O << " = phi ";
2054   printOperands(O, SlotTracker);
2055 }
2056 #endif
2057 
2058 // TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2059 // remove VPActiveLaneMaskPHIRecipe.
2060 void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {
2061   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2062   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
2063     Value *StartMask = State.get(getOperand(0), Part);
2064     PHINode *EntryPart =
2065         State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
2066     EntryPart->addIncoming(StartMask, VectorPH);
2067     EntryPart->setDebugLoc(getDebugLoc());
2068     State.set(this, EntryPart, Part);
2069   }
2070 }
2071 
2072 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2073 void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2074                                       VPSlotTracker &SlotTracker) const {
2075   O << Indent << "ACTIVE-LANE-MASK-PHI ";
2076 
2077   printAsOperand(O, SlotTracker);
2078   O << " = phi ";
2079   printOperands(O, SlotTracker);
2080 }
2081 #endif
2082 
2083 void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) {
2084   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2085   assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization.");
2086   Value *Start = State.get(getOperand(0), VPIteration(0, 0));
2087   PHINode *EntryPart =
2088       State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv");
2089   EntryPart->addIncoming(Start, VectorPH);
2090   EntryPart->setDebugLoc(getDebugLoc());
2091   State.set(this, EntryPart, 0, /*IsScalar=*/true);
2092 }
2093 
2094 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2095 void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2096                                   VPSlotTracker &SlotTracker) const {
2097   O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
2098 
2099   printAsOperand(O, SlotTracker);
2100   O << " = phi ";
2101   printOperands(O, SlotTracker);
2102 }
2103 #endif
2104