xref: /llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (revision a43d999d1476c8ab549fb9039dccb03217ee1053)
1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains implementations for different VPlan recipes.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "VPlan.h"
15 #include "VPlanAnalysis.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/Analysis/IVDescriptors.h"
20 #include "llvm/IR/BasicBlock.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/Instruction.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/Type.h"
25 #include "llvm/IR/Value.h"
26 #include "llvm/Support/Casting.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
31 #include "llvm/Transforms/Utils/LoopUtils.h"
32 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
33 #include <cassert>
34 
35 using namespace llvm;
36 
37 using VectorParts = SmallVector<Value *, 2>;
38 
39 namespace llvm {
40 extern cl::opt<bool> EnableVPlanNativePath;
41 }
42 
43 #define LV_NAME "loop-vectorize"
44 #define DEBUG_TYPE LV_NAME
45 
46 bool VPRecipeBase::mayWriteToMemory() const {
47   switch (getVPDefID()) {
48   case VPInterleaveSC:
49     return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
50   case VPWidenStoreEVLSC:
51   case VPWidenStoreSC:
52     return true;
53   case VPReplicateSC:
54     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
55         ->mayWriteToMemory();
56   case VPWidenCallSC:
57     return !cast<VPWidenCallRecipe>(this)
58                 ->getCalledScalarFunction()
59                 ->onlyReadsMemory();
60   case VPBranchOnMaskSC:
61   case VPScalarIVStepsSC:
62   case VPPredInstPHISC:
63     return false;
64   case VPBlendSC:
65   case VPReductionSC:
66   case VPWidenCanonicalIVSC:
67   case VPWidenCastSC:
68   case VPWidenGEPSC:
69   case VPWidenIntOrFpInductionSC:
70   case VPWidenLoadEVLSC:
71   case VPWidenLoadSC:
72   case VPWidenPHISC:
73   case VPWidenSC:
74   case VPWidenSelectSC: {
75     const Instruction *I =
76         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
77     (void)I;
78     assert((!I || !I->mayWriteToMemory()) &&
79            "underlying instruction may write to memory");
80     return false;
81   }
82   default:
83     return true;
84   }
85 }
86 
87 bool VPRecipeBase::mayReadFromMemory() const {
88   switch (getVPDefID()) {
89   case VPWidenLoadEVLSC:
90   case VPWidenLoadSC:
91     return true;
92   case VPReplicateSC:
93     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
94         ->mayReadFromMemory();
95   case VPWidenCallSC:
96     return !cast<VPWidenCallRecipe>(this)
97                 ->getCalledScalarFunction()
98                 ->onlyWritesMemory();
99   case VPBranchOnMaskSC:
100   case VPPredInstPHISC:
101   case VPScalarIVStepsSC:
102   case VPWidenStoreEVLSC:
103   case VPWidenStoreSC:
104     return false;
105   case VPBlendSC:
106   case VPReductionSC:
107   case VPWidenCanonicalIVSC:
108   case VPWidenCastSC:
109   case VPWidenGEPSC:
110   case VPWidenIntOrFpInductionSC:
111   case VPWidenPHISC:
112   case VPWidenSC:
113   case VPWidenSelectSC: {
114     const Instruction *I =
115         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
116     (void)I;
117     assert((!I || !I->mayReadFromMemory()) &&
118            "underlying instruction may read from memory");
119     return false;
120   }
121   default:
122     return true;
123   }
124 }
125 
126 bool VPRecipeBase::mayHaveSideEffects() const {
127   switch (getVPDefID()) {
128   case VPDerivedIVSC:
129   case VPPredInstPHISC:
130   case VPScalarCastSC:
131     return false;
132   case VPInstructionSC:
133     switch (cast<VPInstruction>(this)->getOpcode()) {
134     case Instruction::Or:
135     case Instruction::ICmp:
136     case Instruction::Select:
137     case VPInstruction::Not:
138     case VPInstruction::CalculateTripCountMinusVF:
139     case VPInstruction::CanonicalIVIncrementForPart:
140     case VPInstruction::ExtractFromEnd:
141     case VPInstruction::LogicalAnd:
142     case VPInstruction::PtrAdd:
143       return false;
144     default:
145       return true;
146     }
147   case VPWidenCallSC: {
148     Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
149     return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
150   }
151   case VPBlendSC:
152   case VPReductionSC:
153   case VPScalarIVStepsSC:
154   case VPWidenCanonicalIVSC:
155   case VPWidenCastSC:
156   case VPWidenGEPSC:
157   case VPWidenIntOrFpInductionSC:
158   case VPWidenPHISC:
159   case VPWidenPointerInductionSC:
160   case VPWidenSC:
161   case VPWidenSelectSC: {
162     const Instruction *I =
163         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
164     (void)I;
165     assert((!I || !I->mayHaveSideEffects()) &&
166            "underlying instruction has side-effects");
167     return false;
168   }
169   case VPInterleaveSC:
170     return mayWriteToMemory();
171   case VPWidenLoadEVLSC:
172   case VPWidenLoadSC:
173   case VPWidenStoreEVLSC:
174   case VPWidenStoreSC:
175     assert(
176         cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
177             mayWriteToMemory() &&
178         "mayHaveSideffects result for ingredient differs from this "
179         "implementation");
180     return mayWriteToMemory();
181   case VPReplicateSC: {
182     auto *R = cast<VPReplicateRecipe>(this);
183     return R->getUnderlyingInstr()->mayHaveSideEffects();
184   }
185   default:
186     return true;
187   }
188 }
189 
190 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
191   auto Lane = VPLane::getLastLaneForVF(State.VF);
192   VPValue *ExitValue = getOperand(0);
193   if (vputils::isUniformAfterVectorization(ExitValue))
194     Lane = VPLane::getFirstLane();
195   VPBasicBlock *MiddleVPBB =
196       cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
197   assert(MiddleVPBB->getNumSuccessors() == 0 &&
198          "the middle block must not have any successors");
199   BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB];
200   Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
201                    MiddleBB);
202 }
203 
204 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
205 void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
206   O << "Live-out ";
207   getPhi()->printAsOperand(O);
208   O << " = ";
209   getOperand(0)->printAsOperand(O, SlotTracker);
210   O << "\n";
211 }
212 #endif
213 
214 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
215   assert(!Parent && "Recipe already in some VPBasicBlock");
216   assert(InsertPos->getParent() &&
217          "Insertion position not in any VPBasicBlock");
218   InsertPos->getParent()->insert(this, InsertPos->getIterator());
219 }
220 
221 void VPRecipeBase::insertBefore(VPBasicBlock &BB,
222                                 iplist<VPRecipeBase>::iterator I) {
223   assert(!Parent && "Recipe already in some VPBasicBlock");
224   assert(I == BB.end() || I->getParent() == &BB);
225   BB.insert(this, I);
226 }
227 
228 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
229   assert(!Parent && "Recipe already in some VPBasicBlock");
230   assert(InsertPos->getParent() &&
231          "Insertion position not in any VPBasicBlock");
232   InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
233 }
234 
235 void VPRecipeBase::removeFromParent() {
236   assert(getParent() && "Recipe not in any VPBasicBlock");
237   getParent()->getRecipeList().remove(getIterator());
238   Parent = nullptr;
239 }
240 
241 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
242   assert(getParent() && "Recipe not in any VPBasicBlock");
243   return getParent()->getRecipeList().erase(getIterator());
244 }
245 
246 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
247   removeFromParent();
248   insertAfter(InsertPos);
249 }
250 
251 void VPRecipeBase::moveBefore(VPBasicBlock &BB,
252                               iplist<VPRecipeBase>::iterator I) {
253   removeFromParent();
254   insertBefore(BB, I);
255 }
256 
257 FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const {
258   assert(OpType == OperationType::FPMathOp &&
259          "recipe doesn't have fast math flags");
260   FastMathFlags Res;
261   Res.setAllowReassoc(FMFs.AllowReassoc);
262   Res.setNoNaNs(FMFs.NoNaNs);
263   Res.setNoInfs(FMFs.NoInfs);
264   Res.setNoSignedZeros(FMFs.NoSignedZeros);
265   Res.setAllowReciprocal(FMFs.AllowReciprocal);
266   Res.setAllowContract(FMFs.AllowContract);
267   Res.setApproxFunc(FMFs.ApproxFunc);
268   return Res;
269 }
270 
271 VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred,
272                              VPValue *A, VPValue *B, DebugLoc DL,
273                              const Twine &Name)
274     : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
275                           Pred, DL),
276       Opcode(Opcode), Name(Name.str()) {
277   assert(Opcode == Instruction::ICmp &&
278          "only ICmp predicates supported at the moment");
279 }
280 
281 VPInstruction::VPInstruction(unsigned Opcode,
282                              std::initializer_list<VPValue *> Operands,
283                              FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
284     : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
285       Opcode(Opcode), Name(Name.str()) {
286   // Make sure the VPInstruction is a floating-point operation.
287   assert(isFPMathOp() && "this op can't take fast-math flags");
288 }
289 
290 bool VPInstruction::doesGeneratePerAllLanes() const {
291   return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
292 }
293 
294 bool VPInstruction::canGenerateScalarForFirstLane() const {
295   if (Instruction::isBinaryOp(getOpcode()))
296     return true;
297   if (isVectorToScalar())
298     return true;
299   switch (Opcode) {
300   case VPInstruction::BranchOnCond:
301   case VPInstruction::BranchOnCount:
302   case VPInstruction::CalculateTripCountMinusVF:
303   case VPInstruction::CanonicalIVIncrementForPart:
304   case VPInstruction::PtrAdd:
305   case VPInstruction::ExplicitVectorLength:
306     return true;
307   default:
308     return false;
309   }
310 }
311 
312 Value *VPInstruction::generatePerLane(VPTransformState &State,
313                                       const VPIteration &Lane) {
314   IRBuilderBase &Builder = State.Builder;
315 
316   assert(getOpcode() == VPInstruction::PtrAdd &&
317          "only PtrAdd opcodes are supported for now");
318   return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
319                               State.get(getOperand(1), Lane), Name);
320 }
321 
322 Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
323   IRBuilderBase &Builder = State.Builder;
324 
325   if (Instruction::isBinaryOp(getOpcode())) {
326     bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
327     Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
328     Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
329     auto *Res =
330         Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
331     if (auto *I = dyn_cast<Instruction>(Res))
332       setFlags(I);
333     return Res;
334   }
335 
336   switch (getOpcode()) {
337   case VPInstruction::Not: {
338     Value *A = State.get(getOperand(0), Part);
339     return Builder.CreateNot(A, Name);
340   }
341   case Instruction::ICmp: {
342     Value *A = State.get(getOperand(0), Part);
343     Value *B = State.get(getOperand(1), Part);
344     return Builder.CreateCmp(getPredicate(), A, B, Name);
345   }
346   case Instruction::Select: {
347     Value *Cond = State.get(getOperand(0), Part);
348     Value *Op1 = State.get(getOperand(1), Part);
349     Value *Op2 = State.get(getOperand(2), Part);
350     return Builder.CreateSelect(Cond, Op1, Op2, Name);
351   }
352   case VPInstruction::ActiveLaneMask: {
353     // Get first lane of vector induction variable.
354     Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
355     // Get the original loop tripcount.
356     Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0));
357 
358     // If this part of the active lane mask is scalar, generate the CMP directly
359     // to avoid unnecessary extracts.
360     if (State.VF.isScalar())
361       return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
362                                Name);
363 
364     auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
365     auto *PredTy = VectorType::get(Int1Ty, State.VF);
366     return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
367                                    {PredTy, ScalarTC->getType()},
368                                    {VIVElem0, ScalarTC}, nullptr, Name);
369   }
370   case VPInstruction::FirstOrderRecurrenceSplice: {
371     // Generate code to combine the previous and current values in vector v3.
372     //
373     //   vector.ph:
374     //     v_init = vector(..., ..., ..., a[-1])
375     //     br vector.body
376     //
377     //   vector.body
378     //     i = phi [0, vector.ph], [i+4, vector.body]
379     //     v1 = phi [v_init, vector.ph], [v2, vector.body]
380     //     v2 = a[i, i+1, i+2, i+3];
381     //     v3 = vector(v1(3), v2(0, 1, 2))
382 
383     // For the first part, use the recurrence phi (v1), otherwise v2.
384     auto *V1 = State.get(getOperand(0), 0);
385     Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
386     if (!PartMinus1->getType()->isVectorTy())
387       return PartMinus1;
388     Value *V2 = State.get(getOperand(1), Part);
389     return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name);
390   }
391   case VPInstruction::CalculateTripCountMinusVF: {
392     if (Part != 0)
393       return State.get(this, 0, /*IsScalar*/ true);
394 
395     Value *ScalarTC = State.get(getOperand(0), {0, 0});
396     Value *Step =
397         createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);
398     Value *Sub = Builder.CreateSub(ScalarTC, Step);
399     Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
400     Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
401     return Builder.CreateSelect(Cmp, Sub, Zero);
402   }
403   case VPInstruction::ExplicitVectorLength: {
404     // Compute EVL
405     auto GetEVL = [=](VPTransformState &State, Value *AVL) {
406       assert(AVL->getType()->isIntegerTy() &&
407              "Requested vector length should be an integer.");
408 
409       // TODO: Add support for MaxSafeDist for correct loop emission.
410       assert(State.VF.isScalable() && "Expected scalable vector factor.");
411       Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
412 
413       Value *EVL = State.Builder.CreateIntrinsic(
414           State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
415           {AVL, VFArg, State.Builder.getTrue()});
416       return EVL;
417     };
418     // TODO: Restructure this code with an explicit remainder loop, vsetvli can
419     // be outside of the main loop.
420     assert(Part == 0 && "No unrolling expected for predicated vectorization.");
421     // Compute VTC - IV as the AVL (requested vector length).
422     Value *Index = State.get(getOperand(0), VPIteration(0, 0));
423     Value *TripCount = State.get(getOperand(1), VPIteration(0, 0));
424     Value *AVL = State.Builder.CreateSub(TripCount, Index);
425     Value *EVL = GetEVL(State, AVL);
426     return EVL;
427   }
428   case VPInstruction::CanonicalIVIncrementForPart: {
429     auto *IV = State.get(getOperand(0), VPIteration(0, 0));
430     if (Part == 0)
431       return IV;
432 
433     // The canonical IV is incremented by the vectorization factor (num of SIMD
434     // elements) times the unroll part.
435     Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
436     return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
437                              hasNoSignedWrap());
438   }
439   case VPInstruction::BranchOnCond: {
440     if (Part != 0)
441       return nullptr;
442 
443     Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
444     VPRegionBlock *ParentRegion = getParent()->getParent();
445     VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
446 
447     // Replace the temporary unreachable terminator with a new conditional
448     // branch, hooking it up to backward destination for exiting blocks now and
449     // to forward destination(s) later when they are created.
450     BranchInst *CondBr =
451         Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
452 
453     if (getParent()->isExiting())
454       CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
455 
456     CondBr->setSuccessor(0, nullptr);
457     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
458     return CondBr;
459   }
460   case VPInstruction::BranchOnCount: {
461     if (Part != 0)
462       return nullptr;
463     // First create the compare.
464     Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true);
465     Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true);
466     Value *Cond = Builder.CreateICmpEQ(IV, TC);
467 
468     // Now create the branch.
469     auto *Plan = getParent()->getPlan();
470     VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
471     VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
472 
473     // Replace the temporary unreachable terminator with a new conditional
474     // branch, hooking it up to backward destination (the header) now and to the
475     // forward destination (the exit/middle block) later when it is created.
476     // Note that CreateCondBr expects a valid BB as first argument, so we need
477     // to set it to nullptr later.
478     BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
479                                               State.CFG.VPBB2IRBB[Header]);
480     CondBr->setSuccessor(0, nullptr);
481     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
482     return CondBr;
483   }
484   case VPInstruction::ComputeReductionResult: {
485     if (Part != 0)
486       return State.get(this, 0, /*IsScalar*/ true);
487 
488     // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
489     // and will be removed by breaking up the recipe further.
490     auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
491     auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
492     // Get its reduction variable descriptor.
493     const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
494 
495     RecurKind RK = RdxDesc.getRecurrenceKind();
496 
497     VPValue *LoopExitingDef = getOperand(1);
498     Type *PhiTy = OrigPhi->getType();
499     VectorParts RdxParts(State.UF);
500     for (unsigned Part = 0; Part < State.UF; ++Part)
501       RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop());
502 
503     // If the vector reduction can be performed in a smaller type, we truncate
504     // then extend the loop exit value to enable InstCombine to evaluate the
505     // entire expression in the smaller type.
506     // TODO: Handle this in truncateToMinBW.
507     if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
508       Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
509       for (unsigned Part = 0; Part < State.UF; ++Part)
510         RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
511     }
512     // Reduce all of the unrolled parts into a single vector.
513     Value *ReducedPartRdx = RdxParts[0];
514     unsigned Op = RecurrenceDescriptor::getOpcode(RK);
515     if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
516       Op = Instruction::Or;
517 
518     if (PhiR->isOrdered()) {
519       ReducedPartRdx = RdxParts[State.UF - 1];
520     } else {
521       // Floating-point operations should have some FMF to enable the reduction.
522       IRBuilderBase::FastMathFlagGuard FMFG(Builder);
523       Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
524       for (unsigned Part = 1; Part < State.UF; ++Part) {
525         Value *RdxPart = RdxParts[Part];
526         if (Op != Instruction::ICmp && Op != Instruction::FCmp)
527           ReducedPartRdx = Builder.CreateBinOp(
528               (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
529         else
530           ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
531       }
532     }
533 
534     // Create the reduction after the loop. Note that inloop reductions create
535     // the target reduction in the loop using a Reduction recipe.
536     if ((State.VF.isVector() ||
537          RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
538         !PhiR->isInLoop()) {
539       ReducedPartRdx =
540           createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
541       // If the reduction can be performed in a smaller type, we need to extend
542       // the reduction to the wider type before we branch to the original loop.
543       if (PhiTy != RdxDesc.getRecurrenceType())
544         ReducedPartRdx = RdxDesc.isSigned()
545                              ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
546                              : Builder.CreateZExt(ReducedPartRdx, PhiTy);
547     }
548 
549     // If there were stores of the reduction value to a uniform memory address
550     // inside the loop, create the final store here.
551     if (StoreInst *SI = RdxDesc.IntermediateStore) {
552       auto *NewSI = Builder.CreateAlignedStore(
553           ReducedPartRdx, SI->getPointerOperand(), SI->getAlign());
554       propagateMetadata(NewSI, SI);
555     }
556 
557     return ReducedPartRdx;
558   }
559   case VPInstruction::ExtractFromEnd: {
560     if (Part != 0)
561       return State.get(this, 0, /*IsScalar*/ true);
562 
563     auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
564     unsigned Offset = CI->getZExtValue();
565     assert(Offset > 0 && "Offset from end must be positive");
566     Value *Res;
567     if (State.VF.isVector()) {
568       assert(Offset <= State.VF.getKnownMinValue() &&
569              "invalid offset to extract from");
570       // Extract lane VF - Offset from the operand.
571       Res = State.get(
572           getOperand(0),
573           VPIteration(State.UF - 1, VPLane::getLaneFromEnd(State.VF, Offset)));
574     } else {
575       assert(Offset <= State.UF && "invalid offset to extract from");
576       // When loop is unrolled without vectorizing, retrieve UF - Offset.
577       Res = State.get(getOperand(0), State.UF - Offset);
578     }
579     Res->setName(Name);
580     return Res;
581   }
582   case VPInstruction::LogicalAnd: {
583     Value *A = State.get(getOperand(0), Part);
584     Value *B = State.get(getOperand(1), Part);
585     return Builder.CreateLogicalAnd(A, B, Name);
586   }
587   case VPInstruction::PtrAdd: {
588     assert(vputils::onlyFirstLaneUsed(this) &&
589            "can only generate first lane for PtrAdd");
590     Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true);
591     Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true);
592     return Builder.CreatePtrAdd(Ptr, Addend, Name);
593   }
594   default:
595     llvm_unreachable("Unsupported opcode for instruction");
596   }
597 }
598 
599 bool VPInstruction::isVectorToScalar() const {
600   return getOpcode() == VPInstruction::ExtractFromEnd ||
601          getOpcode() == VPInstruction::ComputeReductionResult;
602 }
603 
604 #if !defined(NDEBUG)
605 bool VPInstruction::isFPMathOp() const {
606   // Inspired by FPMathOperator::classof. Notable differences are that we don't
607   // support Call, PHI and Select opcodes here yet.
608   return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
609          Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
610          Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
611          Opcode == Instruction::FCmp || Opcode == Instruction::Select;
612 }
613 #endif
614 
615 void VPInstruction::execute(VPTransformState &State) {
616   assert(!State.Instance && "VPInstruction executing an Instance");
617   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
618   assert((hasFastMathFlags() == isFPMathOp() ||
619           getOpcode() == Instruction::Select) &&
620          "Recipe not a FPMathOp but has fast-math flags?");
621   if (hasFastMathFlags())
622     State.Builder.setFastMathFlags(getFastMathFlags());
623   State.setDebugLocFrom(getDebugLoc());
624   bool GeneratesPerFirstLaneOnly =
625       canGenerateScalarForFirstLane() &&
626       (vputils::onlyFirstLaneUsed(this) || isVectorToScalar());
627   bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
628   bool OnlyFirstPartUsed = vputils::onlyFirstPartUsed(this);
629   for (unsigned Part = 0; Part < State.UF; ++Part) {
630     if (GeneratesPerAllLanes) {
631       for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
632            Lane != NumLanes; ++Lane) {
633         Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane));
634         assert(GeneratedValue && "generatePerLane must produce a value");
635         State.set(this, GeneratedValue, VPIteration(Part, Lane));
636       }
637       continue;
638     }
639 
640     if (Part != 0 && OnlyFirstPartUsed && hasResult()) {
641       Value *Part0 = State.get(this, 0, /*IsScalar*/ GeneratesPerFirstLaneOnly);
642       State.set(this, Part0, Part,
643                 /*IsScalar*/ GeneratesPerFirstLaneOnly);
644       continue;
645     }
646 
647     Value *GeneratedValue = generatePerPart(State, Part);
648     if (!hasResult())
649       continue;
650     assert(GeneratedValue && "generatePerPart must produce a value");
651     assert((GeneratedValue->getType()->isVectorTy() ==
652                 !GeneratesPerFirstLaneOnly ||
653             State.VF.isScalar()) &&
654            "scalar value but not only first lane defined");
655     State.set(this, GeneratedValue, Part,
656               /*IsScalar*/ GeneratesPerFirstLaneOnly);
657   }
658 }
659 
660 bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
661   assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
662   if (Instruction::isBinaryOp(getOpcode()))
663     return vputils::onlyFirstLaneUsed(this);
664 
665   switch (getOpcode()) {
666   default:
667     return false;
668   case Instruction::ICmp:
669   case VPInstruction::PtrAdd:
670     // TODO: Cover additional opcodes.
671     return vputils::onlyFirstLaneUsed(this);
672   case VPInstruction::ActiveLaneMask:
673   case VPInstruction::ExplicitVectorLength:
674   case VPInstruction::CalculateTripCountMinusVF:
675   case VPInstruction::CanonicalIVIncrementForPart:
676   case VPInstruction::BranchOnCount:
677     return true;
678   };
679   llvm_unreachable("switch should return");
680 }
681 
682 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
683 void VPInstruction::dump() const {
684   VPSlotTracker SlotTracker(getParent()->getPlan());
685   print(dbgs(), "", SlotTracker);
686 }
687 
688 void VPInstruction::print(raw_ostream &O, const Twine &Indent,
689                           VPSlotTracker &SlotTracker) const {
690   O << Indent << "EMIT ";
691 
692   if (hasResult()) {
693     printAsOperand(O, SlotTracker);
694     O << " = ";
695   }
696 
697   switch (getOpcode()) {
698   case VPInstruction::Not:
699     O << "not";
700     break;
701   case VPInstruction::SLPLoad:
702     O << "combined load";
703     break;
704   case VPInstruction::SLPStore:
705     O << "combined store";
706     break;
707   case VPInstruction::ActiveLaneMask:
708     O << "active lane mask";
709     break;
710   case VPInstruction::ExplicitVectorLength:
711     O << "EXPLICIT-VECTOR-LENGTH";
712     break;
713   case VPInstruction::FirstOrderRecurrenceSplice:
714     O << "first-order splice";
715     break;
716   case VPInstruction::BranchOnCond:
717     O << "branch-on-cond";
718     break;
719   case VPInstruction::CalculateTripCountMinusVF:
720     O << "TC > VF ? TC - VF : 0";
721     break;
722   case VPInstruction::CanonicalIVIncrementForPart:
723     O << "VF * Part +";
724     break;
725   case VPInstruction::BranchOnCount:
726     O << "branch-on-count";
727     break;
728   case VPInstruction::ExtractFromEnd:
729     O << "extract-from-end";
730     break;
731   case VPInstruction::ComputeReductionResult:
732     O << "compute-reduction-result";
733     break;
734   case VPInstruction::LogicalAnd:
735     O << "logical-and";
736     break;
737   case VPInstruction::PtrAdd:
738     O << "ptradd";
739     break;
740   default:
741     O << Instruction::getOpcodeName(getOpcode());
742   }
743 
744   printFlags(O);
745   printOperands(O, SlotTracker);
746 
747   if (auto DL = getDebugLoc()) {
748     O << ", !dbg ";
749     DL.print(O);
750   }
751 }
752 #endif
753 
754 void VPWidenCallRecipe::execute(VPTransformState &State) {
755   assert(State.VF.isVector() && "not widening");
756   Function *CalledScalarFn = getCalledScalarFunction();
757   assert(!isDbgInfoIntrinsic(CalledScalarFn->getIntrinsicID()) &&
758          "DbgInfoIntrinsic should have been dropped during VPlan construction");
759   State.setDebugLocFrom(getDebugLoc());
760 
761   bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic;
762   FunctionType *VFTy = nullptr;
763   if (Variant)
764     VFTy = Variant->getFunctionType();
765   for (unsigned Part = 0; Part < State.UF; ++Part) {
766     SmallVector<Type *, 2> TysForDecl;
767     // Add return type if intrinsic is overloaded on it.
768     if (UseIntrinsic &&
769         isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
770       TysForDecl.push_back(VectorType::get(
771           CalledScalarFn->getReturnType()->getScalarType(), State.VF));
772     SmallVector<Value *, 4> Args;
773     for (const auto &I : enumerate(arg_operands())) {
774       // Some intrinsics have a scalar argument - don't replace it with a
775       // vector.
776       Value *Arg;
777       if (UseIntrinsic &&
778           isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
779         Arg = State.get(I.value(), VPIteration(0, 0));
780       // Some vectorized function variants may also take a scalar argument,
781       // e.g. linear parameters for pointers. This needs to be the scalar value
782       // from the start of the respective part when interleaving.
783       else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy())
784         Arg = State.get(I.value(), VPIteration(Part, 0));
785       else
786         Arg = State.get(I.value(), Part);
787       if (UseIntrinsic &&
788           isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
789         TysForDecl.push_back(Arg->getType());
790       Args.push_back(Arg);
791     }
792 
793     Function *VectorF;
794     if (UseIntrinsic) {
795       // Use vector version of the intrinsic.
796       Module *M = State.Builder.GetInsertBlock()->getModule();
797       VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
798       assert(VectorF && "Can't retrieve vector intrinsic.");
799     } else {
800 #ifndef NDEBUG
801       assert(Variant != nullptr && "Can't create vector function.");
802 #endif
803       VectorF = Variant;
804     }
805 
806     auto *CI = cast_or_null<CallInst>(getUnderlyingInstr());
807     SmallVector<OperandBundleDef, 1> OpBundles;
808     if (CI)
809       CI->getOperandBundlesAsDefs(OpBundles);
810 
811     CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
812 
813     if (isa<FPMathOperator>(V))
814       V->copyFastMathFlags(CI);
815 
816     if (!V->getType()->isVoidTy())
817       State.set(this, V, Part);
818     State.addMetadata(V, CI);
819   }
820 }
821 
822 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
823 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
824                               VPSlotTracker &SlotTracker) const {
825   O << Indent << "WIDEN-CALL ";
826 
827   Function *CalledFn = getCalledScalarFunction();
828   if (CalledFn->getReturnType()->isVoidTy())
829     O << "void ";
830   else {
831     printAsOperand(O, SlotTracker);
832     O << " = ";
833   }
834 
835   O << "call @" << CalledFn->getName() << "(";
836   interleaveComma(arg_operands(), O, [&O, &SlotTracker](VPValue *Op) {
837     Op->printAsOperand(O, SlotTracker);
838   });
839   O << ")";
840 
841   if (VectorIntrinsicID)
842     O << " (using vector intrinsic)";
843   else {
844     O << " (using library function";
845     if (Variant->hasName())
846       O << ": " << Variant->getName();
847     O << ")";
848   }
849 }
850 
851 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
852                                 VPSlotTracker &SlotTracker) const {
853   O << Indent << "WIDEN-SELECT ";
854   printAsOperand(O, SlotTracker);
855   O << " = select ";
856   getOperand(0)->printAsOperand(O, SlotTracker);
857   O << ", ";
858   getOperand(1)->printAsOperand(O, SlotTracker);
859   O << ", ";
860   getOperand(2)->printAsOperand(O, SlotTracker);
861   O << (isInvariantCond() ? " (condition is loop invariant)" : "");
862 }
863 #endif
864 
865 void VPWidenSelectRecipe::execute(VPTransformState &State) {
866   State.setDebugLocFrom(getDebugLoc());
867 
868   // The condition can be loop invariant but still defined inside the
869   // loop. This means that we can't just use the original 'cond' value.
870   // We have to take the 'vectorized' value and pick the first lane.
871   // Instcombine will make this a no-op.
872   auto *InvarCond =
873       isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
874 
875   for (unsigned Part = 0; Part < State.UF; ++Part) {
876     Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part);
877     Value *Op0 = State.get(getOperand(1), Part);
878     Value *Op1 = State.get(getOperand(2), Part);
879     Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
880     State.set(this, Sel, Part);
881     State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
882   }
883 }
884 
885 VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
886     const FastMathFlags &FMF) {
887   AllowReassoc = FMF.allowReassoc();
888   NoNaNs = FMF.noNaNs();
889   NoInfs = FMF.noInfs();
890   NoSignedZeros = FMF.noSignedZeros();
891   AllowReciprocal = FMF.allowReciprocal();
892   AllowContract = FMF.allowContract();
893   ApproxFunc = FMF.approxFunc();
894 }
895 
896 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
897 void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
898   switch (OpType) {
899   case OperationType::Cmp:
900     O << " " << CmpInst::getPredicateName(getPredicate());
901     break;
902   case OperationType::DisjointOp:
903     if (DisjointFlags.IsDisjoint)
904       O << " disjoint";
905     break;
906   case OperationType::PossiblyExactOp:
907     if (ExactFlags.IsExact)
908       O << " exact";
909     break;
910   case OperationType::OverflowingBinOp:
911     if (WrapFlags.HasNUW)
912       O << " nuw";
913     if (WrapFlags.HasNSW)
914       O << " nsw";
915     break;
916   case OperationType::FPMathOp:
917     getFastMathFlags().print(O);
918     break;
919   case OperationType::GEPOp:
920     if (GEPFlags.IsInBounds)
921       O << " inbounds";
922     break;
923   case OperationType::NonNegOp:
924     if (NonNegFlags.NonNeg)
925       O << " nneg";
926     break;
927   case OperationType::Other:
928     break;
929   }
930   if (getNumOperands() > 0)
931     O << " ";
932 }
933 #endif
934 
935 void VPWidenRecipe::execute(VPTransformState &State) {
936   State.setDebugLocFrom(getDebugLoc());
937   auto &Builder = State.Builder;
938   switch (Opcode) {
939   case Instruction::Call:
940   case Instruction::Br:
941   case Instruction::PHI:
942   case Instruction::GetElementPtr:
943   case Instruction::Select:
944     llvm_unreachable("This instruction is handled by a different recipe.");
945   case Instruction::UDiv:
946   case Instruction::SDiv:
947   case Instruction::SRem:
948   case Instruction::URem:
949   case Instruction::Add:
950   case Instruction::FAdd:
951   case Instruction::Sub:
952   case Instruction::FSub:
953   case Instruction::FNeg:
954   case Instruction::Mul:
955   case Instruction::FMul:
956   case Instruction::FDiv:
957   case Instruction::FRem:
958   case Instruction::Shl:
959   case Instruction::LShr:
960   case Instruction::AShr:
961   case Instruction::And:
962   case Instruction::Or:
963   case Instruction::Xor: {
964     // Just widen unops and binops.
965     for (unsigned Part = 0; Part < State.UF; ++Part) {
966       SmallVector<Value *, 2> Ops;
967       for (VPValue *VPOp : operands())
968         Ops.push_back(State.get(VPOp, Part));
969 
970       Value *V = Builder.CreateNAryOp(Opcode, Ops);
971 
972       if (auto *VecOp = dyn_cast<Instruction>(V))
973         setFlags(VecOp);
974 
975       // Use this vector value for all users of the original instruction.
976       State.set(this, V, Part);
977       State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
978     }
979 
980     break;
981   }
982   case Instruction::Freeze: {
983     for (unsigned Part = 0; Part < State.UF; ++Part) {
984       Value *Op = State.get(getOperand(0), Part);
985 
986       Value *Freeze = Builder.CreateFreeze(Op);
987       State.set(this, Freeze, Part);
988     }
989     break;
990   }
991   case Instruction::ICmp:
992   case Instruction::FCmp: {
993     // Widen compares. Generate vector compares.
994     bool FCmp = Opcode == Instruction::FCmp;
995     for (unsigned Part = 0; Part < State.UF; ++Part) {
996       Value *A = State.get(getOperand(0), Part);
997       Value *B = State.get(getOperand(1), Part);
998       Value *C = nullptr;
999       if (FCmp) {
1000         // Propagate fast math flags.
1001         IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1002         if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
1003           Builder.setFastMathFlags(I->getFastMathFlags());
1004         C = Builder.CreateFCmp(getPredicate(), A, B);
1005       } else {
1006         C = Builder.CreateICmp(getPredicate(), A, B);
1007       }
1008       State.set(this, C, Part);
1009       State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1010     }
1011 
1012     break;
1013   }
1014   default:
1015     // This instruction is not vectorized by simple widening.
1016     LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
1017                       << Instruction::getOpcodeName(Opcode));
1018     llvm_unreachable("Unhandled instruction!");
1019   } // end of switch.
1020 
1021 #if !defined(NDEBUG)
1022   // Verify that VPlan type inference results agree with the type of the
1023   // generated values.
1024   for (unsigned Part = 0; Part < State.UF; ++Part) {
1025     assert(VectorType::get(State.TypeAnalysis.inferScalarType(this),
1026                            State.VF) == State.get(this, Part)->getType() &&
1027            "inferred type and type from generated instructions do not match");
1028   }
1029 #endif
1030 }
1031 
1032 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1033 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
1034                           VPSlotTracker &SlotTracker) const {
1035   O << Indent << "WIDEN ";
1036   printAsOperand(O, SlotTracker);
1037   O << " = " << Instruction::getOpcodeName(Opcode);
1038   printFlags(O);
1039   printOperands(O, SlotTracker);
1040 }
1041 #endif
1042 
1043 void VPWidenCastRecipe::execute(VPTransformState &State) {
1044   State.setDebugLocFrom(getDebugLoc());
1045   auto &Builder = State.Builder;
1046   /// Vectorize casts.
1047   assert(State.VF.isVector() && "Not vectorizing?");
1048   Type *DestTy = VectorType::get(getResultType(), State.VF);
1049   VPValue *Op = getOperand(0);
1050   for (unsigned Part = 0; Part < State.UF; ++Part) {
1051     if (Part > 0 && Op->isLiveIn()) {
1052       // FIXME: Remove once explicit unrolling is implemented using VPlan.
1053       State.set(this, State.get(this, 0), Part);
1054       continue;
1055     }
1056     Value *A = State.get(Op, Part);
1057     Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
1058     State.set(this, Cast, Part);
1059     State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
1060   }
1061 }
1062 
1063 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1064 void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
1065                               VPSlotTracker &SlotTracker) const {
1066   O << Indent << "WIDEN-CAST ";
1067   printAsOperand(O, SlotTracker);
1068   O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1069   printFlags(O);
1070   printOperands(O, SlotTracker);
1071   O << " to " << *getResultType();
1072 }
1073 #endif
1074 
1075 /// This function adds
1076 /// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
1077 /// to each vector element of Val. The sequence starts at StartIndex.
1078 /// \p Opcode is relevant for FP induction variable.
1079 static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
1080                             Instruction::BinaryOps BinOp, ElementCount VF,
1081                             IRBuilderBase &Builder) {
1082   assert(VF.isVector() && "only vector VFs are supported");
1083 
1084   // Create and check the types.
1085   auto *ValVTy = cast<VectorType>(Val->getType());
1086   ElementCount VLen = ValVTy->getElementCount();
1087 
1088   Type *STy = Val->getType()->getScalarType();
1089   assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
1090          "Induction Step must be an integer or FP");
1091   assert(Step->getType() == STy && "Step has wrong type");
1092 
1093   SmallVector<Constant *, 8> Indices;
1094 
1095   // Create a vector of consecutive numbers from zero to VF.
1096   VectorType *InitVecValVTy = ValVTy;
1097   if (STy->isFloatingPointTy()) {
1098     Type *InitVecValSTy =
1099         IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());
1100     InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1101   }
1102   Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1103 
1104   // Splat the StartIdx
1105   Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
1106 
1107   if (STy->isIntegerTy()) {
1108     InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
1109     Step = Builder.CreateVectorSplat(VLen, Step);
1110     assert(Step->getType() == Val->getType() && "Invalid step vec");
1111     // FIXME: The newly created binary instructions should contain nsw/nuw
1112     // flags, which can be found from the original scalar operations.
1113     Step = Builder.CreateMul(InitVec, Step);
1114     return Builder.CreateAdd(Val, Step, "induction");
1115   }
1116 
1117   // Floating point induction.
1118   assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
1119          "Binary Opcode should be specified for FP induction");
1120   InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
1121   InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
1122 
1123   Step = Builder.CreateVectorSplat(VLen, Step);
1124   Value *MulOp = Builder.CreateFMul(InitVec, Step);
1125   return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
1126 }
1127 
1128 /// A helper function that returns an integer or floating-point constant with
1129 /// value C.
1130 static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
1131   return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
1132                            : ConstantFP::get(Ty, C);
1133 }
1134 
1135 static Value *getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy,
1136                                   ElementCount VF) {
1137   assert(FTy->isFloatingPointTy() && "Expected floating point type!");
1138   Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
1139   Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
1140   return B.CreateUIToFP(RuntimeVF, FTy);
1141 }
1142 
1143 void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
1144   assert(!State.Instance && "Int or FP induction being replicated.");
1145 
1146   Value *Start = getStartValue()->getLiveInIRValue();
1147   const InductionDescriptor &ID = getInductionDescriptor();
1148   TruncInst *Trunc = getTruncInst();
1149   IRBuilderBase &Builder = State.Builder;
1150   assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
1151   assert(State.VF.isVector() && "must have vector VF");
1152 
1153   // The value from the original loop to which we are mapping the new induction
1154   // variable.
1155   Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
1156 
1157   // Fast-math-flags propagate from the original induction instruction.
1158   IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1159   if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
1160     Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1161 
1162   // Now do the actual transformations, and start with fetching the step value.
1163   Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1164 
1165   assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
1166          "Expected either an induction phi-node or a truncate of it!");
1167 
1168   // Construct the initial value of the vector IV in the vector loop preheader
1169   auto CurrIP = Builder.saveIP();
1170   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1171   Builder.SetInsertPoint(VectorPH->getTerminator());
1172   if (isa<TruncInst>(EntryVal)) {
1173     assert(Start->getType()->isIntegerTy() &&
1174            "Truncation requires an integer type");
1175     auto *TruncType = cast<IntegerType>(EntryVal->getType());
1176     Step = Builder.CreateTrunc(Step, TruncType);
1177     Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
1178   }
1179 
1180   Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
1181   Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1182   Value *SteppedStart = getStepVector(
1183       SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
1184 
1185   // We create vector phi nodes for both integer and floating-point induction
1186   // variables. Here, we determine the kind of arithmetic we will perform.
1187   Instruction::BinaryOps AddOp;
1188   Instruction::BinaryOps MulOp;
1189   if (Step->getType()->isIntegerTy()) {
1190     AddOp = Instruction::Add;
1191     MulOp = Instruction::Mul;
1192   } else {
1193     AddOp = ID.getInductionOpcode();
1194     MulOp = Instruction::FMul;
1195   }
1196 
1197   // Multiply the vectorization factor by the step using integer or
1198   // floating-point arithmetic as appropriate.
1199   Type *StepType = Step->getType();
1200   Value *RuntimeVF;
1201   if (Step->getType()->isFloatingPointTy())
1202     RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
1203   else
1204     RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
1205   Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
1206 
1207   // Create a vector splat to use in the induction update.
1208   //
1209   // FIXME: If the step is non-constant, we create the vector splat with
1210   //        IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
1211   //        handle a constant vector splat.
1212   Value *SplatVF = isa<Constant>(Mul)
1213                        ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
1214                        : Builder.CreateVectorSplat(State.VF, Mul);
1215   Builder.restoreIP(CurrIP);
1216 
1217   // We may need to add the step a number of times, depending on the unroll
1218   // factor. The last of those goes into the PHI.
1219   PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
1220   VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1221   VecInd->setDebugLoc(EntryVal->getDebugLoc());
1222   Instruction *LastInduction = VecInd;
1223   for (unsigned Part = 0; Part < State.UF; ++Part) {
1224     State.set(this, LastInduction, Part);
1225 
1226     if (isa<TruncInst>(EntryVal))
1227       State.addMetadata(LastInduction, EntryVal);
1228 
1229     LastInduction = cast<Instruction>(
1230         Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
1231     LastInduction->setDebugLoc(EntryVal->getDebugLoc());
1232   }
1233 
1234   LastInduction->setName("vec.ind.next");
1235   VecInd->addIncoming(SteppedStart, VectorPH);
1236   // Add induction update using an incorrect block temporarily. The phi node
1237   // will be fixed after VPlan execution. Note that at this point the latch
1238   // block cannot be used, as it does not exist yet.
1239   // TODO: Model increment value in VPlan, by turning the recipe into a
1240   // multi-def and a subclass of VPHeaderPHIRecipe.
1241   VecInd->addIncoming(LastInduction, VectorPH);
1242 }
1243 
1244 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1245 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
1246                                           VPSlotTracker &SlotTracker) const {
1247   O << Indent << "WIDEN-INDUCTION";
1248   if (getTruncInst()) {
1249     O << "\\l\"";
1250     O << " +\n" << Indent << "\"  " << VPlanIngredient(IV) << "\\l\"";
1251     O << " +\n" << Indent << "\"  ";
1252     getVPValue(0)->printAsOperand(O, SlotTracker);
1253   } else
1254     O << " " << VPlanIngredient(IV);
1255 
1256   O << ", ";
1257   getStepValue()->printAsOperand(O, SlotTracker);
1258 }
1259 #endif
1260 
1261 bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
1262   // The step may be defined by a recipe in the preheader (e.g. if it requires
1263   // SCEV expansion), but for the canonical induction the step is required to be
1264   // 1, which is represented as live-in.
1265   if (getStepValue()->getDefiningRecipe())
1266     return false;
1267   auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
1268   auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1269   auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
1270   return StartC && StartC->isZero() && StepC && StepC->isOne() &&
1271          getScalarType() == CanIV->getScalarType();
1272 }
1273 
1274 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1275 void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent,
1276                               VPSlotTracker &SlotTracker) const {
1277   O << Indent;
1278   printAsOperand(O, SlotTracker);
1279   O << Indent << "= DERIVED-IV ";
1280   getStartValue()->printAsOperand(O, SlotTracker);
1281   O << " + ";
1282   getOperand(1)->printAsOperand(O, SlotTracker);
1283   O << " * ";
1284   getStepValue()->printAsOperand(O, SlotTracker);
1285 }
1286 #endif
1287 
1288 void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
1289   // Fast-math-flags propagate from the original induction instruction.
1290   IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
1291   if (hasFastMathFlags())
1292     State.Builder.setFastMathFlags(getFastMathFlags());
1293 
1294   /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1295   /// variable on which to base the steps, \p Step is the size of the step.
1296 
1297   Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0));
1298   Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1299   IRBuilderBase &Builder = State.Builder;
1300 
1301   // Ensure step has the same type as that of scalar IV.
1302   Type *BaseIVTy = BaseIV->getType()->getScalarType();
1303   assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1304 
1305   // We build scalar steps for both integer and floating-point induction
1306   // variables. Here, we determine the kind of arithmetic we will perform.
1307   Instruction::BinaryOps AddOp;
1308   Instruction::BinaryOps MulOp;
1309   if (BaseIVTy->isIntegerTy()) {
1310     AddOp = Instruction::Add;
1311     MulOp = Instruction::Mul;
1312   } else {
1313     AddOp = InductionOpcode;
1314     MulOp = Instruction::FMul;
1315   }
1316 
1317   // Determine the number of scalars we need to generate for each unroll
1318   // iteration.
1319   bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1320   // Compute the scalar steps and save the results in State.
1321   Type *IntStepTy =
1322       IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1323   Type *VecIVTy = nullptr;
1324   Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1325   if (!FirstLaneOnly && State.VF.isScalable()) {
1326     VecIVTy = VectorType::get(BaseIVTy, State.VF);
1327     UnitStepVec =
1328         Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1329     SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1330     SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1331   }
1332 
1333   unsigned StartPart = 0;
1334   unsigned EndPart = State.UF;
1335   unsigned StartLane = 0;
1336   unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1337   if (State.Instance) {
1338     StartPart = State.Instance->Part;
1339     EndPart = StartPart + 1;
1340     StartLane = State.Instance->Lane.getKnownLane();
1341     EndLane = StartLane + 1;
1342   }
1343   for (unsigned Part = StartPart; Part < EndPart; ++Part) {
1344     Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
1345 
1346     if (!FirstLaneOnly && State.VF.isScalable()) {
1347       auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1348       auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1349       if (BaseIVTy->isFloatingPointTy())
1350         InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1351       auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1352       auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1353       State.set(this, Add, Part);
1354       // It's useful to record the lane values too for the known minimum number
1355       // of elements so we do those below. This improves the code quality when
1356       // trying to extract the first element, for example.
1357     }
1358 
1359     if (BaseIVTy->isFloatingPointTy())
1360       StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1361 
1362     for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1363       Value *StartIdx = Builder.CreateBinOp(
1364           AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1365       // The step returned by `createStepForVF` is a runtime-evaluated value
1366       // when VF is scalable. Otherwise, it should be folded into a Constant.
1367       assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1368              "Expected StartIdx to be folded to a constant when VF is not "
1369              "scalable");
1370       auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1371       auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1372       State.set(this, Add, VPIteration(Part, Lane));
1373     }
1374   }
1375 }
1376 
1377 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1378 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
1379                                   VPSlotTracker &SlotTracker) const {
1380   O << Indent;
1381   printAsOperand(O, SlotTracker);
1382   O << " = SCALAR-STEPS ";
1383   printOperands(O, SlotTracker);
1384 }
1385 #endif
1386 
1387 void VPWidenGEPRecipe::execute(VPTransformState &State) {
1388   assert(State.VF.isVector() && "not widening");
1389   auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1390   // Construct a vector GEP by widening the operands of the scalar GEP as
1391   // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1392   // results in a vector of pointers when at least one operand of the GEP
1393   // is vector-typed. Thus, to keep the representation compact, we only use
1394   // vector-typed operands for loop-varying values.
1395 
1396   if (areAllOperandsInvariant()) {
1397     // If we are vectorizing, but the GEP has only loop-invariant operands,
1398     // the GEP we build (by only using vector-typed operands for
1399     // loop-varying values) would be a scalar pointer. Thus, to ensure we
1400     // produce a vector of pointers, we need to either arbitrarily pick an
1401     // operand to broadcast, or broadcast a clone of the original GEP.
1402     // Here, we broadcast a clone of the original.
1403     //
1404     // TODO: If at some point we decide to scalarize instructions having
1405     //       loop-invariant operands, this special case will no longer be
1406     //       required. We would add the scalarization decision to
1407     //       collectLoopScalars() and teach getVectorValue() to broadcast
1408     //       the lane-zero scalar value.
1409     SmallVector<Value *> Ops;
1410     for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1411       Ops.push_back(State.get(getOperand(I), VPIteration(0, 0)));
1412 
1413     auto *NewGEP =
1414         State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1415                                 ArrayRef(Ops).drop_front(), "", isInBounds());
1416     for (unsigned Part = 0; Part < State.UF; ++Part) {
1417       Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1418       State.set(this, EntryPart, Part);
1419       State.addMetadata(EntryPart, GEP);
1420     }
1421   } else {
1422     // If the GEP has at least one loop-varying operand, we are sure to
1423     // produce a vector of pointers. But if we are only unrolling, we want
1424     // to produce a scalar GEP for each unroll part. Thus, the GEP we
1425     // produce with the code below will be scalar (if VF == 1) or vector
1426     // (otherwise). Note that for the unroll-only case, we still maintain
1427     // values in the vector mapping with initVector, as we do for other
1428     // instructions.
1429     for (unsigned Part = 0; Part < State.UF; ++Part) {
1430       // The pointer operand of the new GEP. If it's loop-invariant, we
1431       // won't broadcast it.
1432       auto *Ptr = isPointerLoopInvariant()
1433                       ? State.get(getOperand(0), VPIteration(0, 0))
1434                       : State.get(getOperand(0), Part);
1435 
1436       // Collect all the indices for the new GEP. If any index is
1437       // loop-invariant, we won't broadcast it.
1438       SmallVector<Value *, 4> Indices;
1439       for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1440         VPValue *Operand = getOperand(I);
1441         if (isIndexLoopInvariant(I - 1))
1442           Indices.push_back(State.get(Operand, VPIteration(0, 0)));
1443         else
1444           Indices.push_back(State.get(Operand, Part));
1445       }
1446 
1447       // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1448       // but it should be a vector, otherwise.
1449       auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
1450                                              Indices, "", isInBounds());
1451       assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
1452              "NewGEP is not a pointer vector");
1453       State.set(this, NewGEP, Part);
1454       State.addMetadata(NewGEP, GEP);
1455     }
1456   }
1457 }
1458 
1459 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1460 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
1461                              VPSlotTracker &SlotTracker) const {
1462   O << Indent << "WIDEN-GEP ";
1463   O << (isPointerLoopInvariant() ? "Inv" : "Var");
1464   for (size_t I = 0; I < getNumOperands() - 1; ++I)
1465     O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
1466 
1467   O << " ";
1468   printAsOperand(O, SlotTracker);
1469   O << " = getelementptr";
1470   printFlags(O);
1471   printOperands(O, SlotTracker);
1472 }
1473 #endif
1474 
1475 void VPVectorPointerRecipe ::execute(VPTransformState &State) {
1476   auto &Builder = State.Builder;
1477   State.setDebugLocFrom(getDebugLoc());
1478   for (unsigned Part = 0; Part < State.UF; ++Part) {
1479     // Calculate the pointer for the specific unroll-part.
1480     Value *PartPtr = nullptr;
1481     // Use i32 for the gep index type when the value is constant,
1482     // or query DataLayout for a more suitable index type otherwise.
1483     const DataLayout &DL =
1484         Builder.GetInsertBlock()->getModule()->getDataLayout();
1485     Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0)
1486                         ? DL.getIndexType(IndexedTy->getPointerTo())
1487                         : Builder.getInt32Ty();
1488     Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
1489     bool InBounds = isInBounds();
1490     if (IsReverse) {
1491       // If the address is consecutive but reversed, then the
1492       // wide store needs to start at the last vector element.
1493       // RunTimeVF =  VScale * VF.getKnownMinValue()
1494       // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
1495       Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
1496       // NumElt = -Part * RunTimeVF
1497       Value *NumElt = Builder.CreateMul(
1498           ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);
1499       // LastLane = 1 - RunTimeVF
1500       Value *LastLane =
1501           Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
1502       PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
1503       PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds);
1504     } else {
1505       Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
1506       PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
1507     }
1508 
1509     State.set(this, PartPtr, Part, /*IsScalar*/ true);
1510   }
1511 }
1512 
1513 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1514 void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
1515                                   VPSlotTracker &SlotTracker) const {
1516   O << Indent;
1517   printAsOperand(O, SlotTracker);
1518   O << " = vector-pointer ";
1519   if (IsReverse)
1520     O << "(reverse) ";
1521 
1522   printOperands(O, SlotTracker);
1523 }
1524 #endif
1525 
1526 void VPBlendRecipe::execute(VPTransformState &State) {
1527   State.setDebugLocFrom(getDebugLoc());
1528   // We know that all PHIs in non-header blocks are converted into
1529   // selects, so we don't have to worry about the insertion order and we
1530   // can just use the builder.
1531   // At this point we generate the predication tree. There may be
1532   // duplications since this is a simple recursive scan, but future
1533   // optimizations will clean it up.
1534 
1535   unsigned NumIncoming = getNumIncomingValues();
1536 
1537   // Generate a sequence of selects of the form:
1538   // SELECT(Mask3, In3,
1539   //        SELECT(Mask2, In2,
1540   //               SELECT(Mask1, In1,
1541   //                      In0)))
1542   // Note that Mask0 is never used: lanes for which no path reaches this phi and
1543   // are essentially undef are taken from In0.
1544  VectorParts Entry(State.UF);
1545  bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
1546  for (unsigned In = 0; In < NumIncoming; ++In) {
1547    for (unsigned Part = 0; Part < State.UF; ++Part) {
1548      // We might have single edge PHIs (blocks) - use an identity
1549      // 'select' for the first PHI operand.
1550      Value *In0 = State.get(getIncomingValue(In), Part, OnlyFirstLaneUsed);
1551      if (In == 0)
1552        Entry[Part] = In0; // Initialize with the first incoming value.
1553      else {
1554        // Select between the current value and the previous incoming edge
1555        // based on the incoming mask.
1556        Value *Cond = State.get(getMask(In), Part, OnlyFirstLaneUsed);
1557        Entry[Part] =
1558            State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
1559      }
1560    }
1561  }
1562   for (unsigned Part = 0; Part < State.UF; ++Part)
1563     State.set(this, Entry[Part], Part, OnlyFirstLaneUsed);
1564 }
1565 
1566 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1567 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
1568                           VPSlotTracker &SlotTracker) const {
1569   O << Indent << "BLEND ";
1570   printAsOperand(O, SlotTracker);
1571   O << " =";
1572   if (getNumIncomingValues() == 1) {
1573     // Not a User of any mask: not really blending, this is a
1574     // single-predecessor phi.
1575     O << " ";
1576     getIncomingValue(0)->printAsOperand(O, SlotTracker);
1577   } else {
1578     for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
1579       O << " ";
1580       getIncomingValue(I)->printAsOperand(O, SlotTracker);
1581       if (I == 0)
1582         continue;
1583       O << "/";
1584       getMask(I)->printAsOperand(O, SlotTracker);
1585     }
1586   }
1587 }
1588 #endif
1589 
1590 void VPReductionRecipe::execute(VPTransformState &State) {
1591   assert(!State.Instance && "Reduction being replicated.");
1592   Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true);
1593   RecurKind Kind = RdxDesc.getRecurrenceKind();
1594   // Propagate the fast-math flags carried by the underlying instruction.
1595   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
1596   State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
1597   for (unsigned Part = 0; Part < State.UF; ++Part) {
1598     Value *NewVecOp = State.get(getVecOp(), Part);
1599     if (VPValue *Cond = getCondOp()) {
1600       Value *NewCond = State.get(Cond, Part, State.VF.isScalar());
1601       VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
1602       Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
1603       Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy,
1604                                                   RdxDesc.getFastMathFlags());
1605       if (State.VF.isVector()) {
1606         Iden = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
1607       }
1608 
1609       Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden);
1610       NewVecOp = Select;
1611     }
1612     Value *NewRed;
1613     Value *NextInChain;
1614     if (IsOrdered) {
1615       if (State.VF.isVector())
1616         NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp,
1617                                         PrevInChain);
1618       else
1619         NewRed = State.Builder.CreateBinOp(
1620             (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain,
1621             NewVecOp);
1622       PrevInChain = NewRed;
1623     } else {
1624       PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true);
1625       NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp);
1626     }
1627     if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
1628       NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
1629                                    NewRed, PrevInChain);
1630     } else if (IsOrdered)
1631       NextInChain = NewRed;
1632     else
1633       NextInChain = State.Builder.CreateBinOp(
1634           (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain);
1635     State.set(this, NextInChain, Part, /*IsScalar*/ true);
1636   }
1637 }
1638 
1639 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1640 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
1641                               VPSlotTracker &SlotTracker) const {
1642   O << Indent << "REDUCE ";
1643   printAsOperand(O, SlotTracker);
1644   O << " = ";
1645   getChainOp()->printAsOperand(O, SlotTracker);
1646   O << " +";
1647   if (isa<FPMathOperator>(getUnderlyingInstr()))
1648     O << getUnderlyingInstr()->getFastMathFlags();
1649   O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
1650   getVecOp()->printAsOperand(O, SlotTracker);
1651   if (getCondOp()) {
1652     O << ", ";
1653     getCondOp()->printAsOperand(O, SlotTracker);
1654   }
1655   O << ")";
1656   if (RdxDesc.IntermediateStore)
1657     O << " (with final reduction value stored in invariant address sank "
1658          "outside of loop)";
1659 }
1660 #endif
1661 
1662 bool VPReplicateRecipe::shouldPack() const {
1663   // Find if the recipe is used by a widened recipe via an intervening
1664   // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
1665   return any_of(users(), [](const VPUser *U) {
1666     if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
1667       return any_of(PredR->users(), [PredR](const VPUser *U) {
1668         return !U->usesScalars(PredR);
1669       });
1670     return false;
1671   });
1672 }
1673 
1674 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1675 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
1676                               VPSlotTracker &SlotTracker) const {
1677   O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
1678 
1679   if (!getUnderlyingInstr()->getType()->isVoidTy()) {
1680     printAsOperand(O, SlotTracker);
1681     O << " = ";
1682   }
1683   if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
1684     O << "call";
1685     printFlags(O);
1686     O << "@" << CB->getCalledFunction()->getName() << "(";
1687     interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
1688                     O, [&O, &SlotTracker](VPValue *Op) {
1689                       Op->printAsOperand(O, SlotTracker);
1690                     });
1691     O << ")";
1692   } else {
1693     O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode());
1694     printFlags(O);
1695     printOperands(O, SlotTracker);
1696   }
1697 
1698   if (shouldPack())
1699     O << " (S->V)";
1700 }
1701 #endif
1702 
1703 /// Checks if \p C is uniform across all VFs and UFs. It is considered as such
1704 /// if it is either defined outside the vector region or its operand is known to
1705 /// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).
1706 /// TODO: Uniformity should be associated with a VPValue and there should be a
1707 /// generic way to check.
1708 static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C) {
1709   return C->isDefinedOutsideVectorRegions() ||
1710          isa<VPDerivedIVRecipe>(C->getOperand(0)) ||
1711          isa<VPCanonicalIVPHIRecipe>(C->getOperand(0));
1712 }
1713 
1714 Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
1715   assert(vputils::onlyFirstLaneUsed(this) &&
1716          "Codegen only implemented for first lane.");
1717   switch (Opcode) {
1718   case Instruction::SExt:
1719   case Instruction::ZExt:
1720   case Instruction::Trunc: {
1721     // Note: SExt/ZExt not used yet.
1722     Value *Op = State.get(getOperand(0), VPIteration(Part, 0));
1723     return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
1724   }
1725   default:
1726     llvm_unreachable("opcode not implemented yet");
1727   }
1728 }
1729 
1730 void VPScalarCastRecipe ::execute(VPTransformState &State) {
1731   bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this);
1732   for (unsigned Part = 0; Part != State.UF; ++Part) {
1733     Value *Res;
1734     // Only generate a single instance, if the recipe is uniform across UFs and
1735     // VFs.
1736     if (Part > 0 && IsUniformAcrossVFsAndUFs)
1737       Res = State.get(this, VPIteration(0, 0));
1738     else
1739       Res = generate(State, Part);
1740     State.set(this, Res, VPIteration(Part, 0));
1741   }
1742 }
1743 
1744 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1745 void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
1746                                 VPSlotTracker &SlotTracker) const {
1747   O << Indent << "SCALAR-CAST ";
1748   printAsOperand(O, SlotTracker);
1749   O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1750   printOperands(O, SlotTracker);
1751   O << " to " << *ResultTy;
1752 }
1753 #endif
1754 
1755 void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
1756   assert(State.Instance && "Branch on Mask works only on single instance.");
1757 
1758   unsigned Part = State.Instance->Part;
1759   unsigned Lane = State.Instance->Lane.getKnownLane();
1760 
1761   Value *ConditionBit = nullptr;
1762   VPValue *BlockInMask = getMask();
1763   if (BlockInMask) {
1764     ConditionBit = State.get(BlockInMask, Part);
1765     if (ConditionBit->getType()->isVectorTy())
1766       ConditionBit = State.Builder.CreateExtractElement(
1767           ConditionBit, State.Builder.getInt32(Lane));
1768   } else // Block in mask is all-one.
1769     ConditionBit = State.Builder.getTrue();
1770 
1771   // Replace the temporary unreachable terminator with a new conditional branch,
1772   // whose two destinations will be set later when they are created.
1773   auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
1774   assert(isa<UnreachableInst>(CurrentTerminator) &&
1775          "Expected to replace unreachable terminator with conditional branch.");
1776   auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
1777   CondBr->setSuccessor(0, nullptr);
1778   ReplaceInstWithInst(CurrentTerminator, CondBr);
1779 }
1780 
1781 void VPPredInstPHIRecipe::execute(VPTransformState &State) {
1782   assert(State.Instance && "Predicated instruction PHI works per instance.");
1783   Instruction *ScalarPredInst =
1784       cast<Instruction>(State.get(getOperand(0), *State.Instance));
1785   BasicBlock *PredicatedBB = ScalarPredInst->getParent();
1786   BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
1787   assert(PredicatingBB && "Predicated block has no single predecessor.");
1788   assert(isa<VPReplicateRecipe>(getOperand(0)) &&
1789          "operand must be VPReplicateRecipe");
1790 
1791   // By current pack/unpack logic we need to generate only a single phi node: if
1792   // a vector value for the predicated instruction exists at this point it means
1793   // the instruction has vector users only, and a phi for the vector value is
1794   // needed. In this case the recipe of the predicated instruction is marked to
1795   // also do that packing, thereby "hoisting" the insert-element sequence.
1796   // Otherwise, a phi node for the scalar value is needed.
1797   unsigned Part = State.Instance->Part;
1798   if (State.hasVectorValue(getOperand(0), Part)) {
1799     Value *VectorValue = State.get(getOperand(0), Part);
1800     InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
1801     PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
1802     VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
1803     VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
1804     if (State.hasVectorValue(this, Part))
1805       State.reset(this, VPhi, Part);
1806     else
1807       State.set(this, VPhi, Part);
1808     // NOTE: Currently we need to update the value of the operand, so the next
1809     // predicated iteration inserts its generated value in the correct vector.
1810     State.reset(getOperand(0), VPhi, Part);
1811   } else {
1812     Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
1813     PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
1814     Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
1815                      PredicatingBB);
1816     Phi->addIncoming(ScalarPredInst, PredicatedBB);
1817     if (State.hasScalarValue(this, *State.Instance))
1818       State.reset(this, Phi, *State.Instance);
1819     else
1820       State.set(this, Phi, *State.Instance);
1821     // NOTE: Currently we need to update the value of the operand, so the next
1822     // predicated iteration inserts its generated value in the correct vector.
1823     State.reset(getOperand(0), Phi, *State.Instance);
1824   }
1825 }
1826 
1827 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1828 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1829                                 VPSlotTracker &SlotTracker) const {
1830   O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1831   printAsOperand(O, SlotTracker);
1832   O << " = ";
1833   printOperands(O, SlotTracker);
1834 }
1835 
1836 void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,
1837                               VPSlotTracker &SlotTracker) const {
1838   O << Indent << "WIDEN ";
1839   printAsOperand(O, SlotTracker);
1840   O << " = load ";
1841   printOperands(O, SlotTracker);
1842 }
1843 
1844 void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent,
1845                                  VPSlotTracker &SlotTracker) const {
1846   O << Indent << "WIDEN ";
1847   printAsOperand(O, SlotTracker);
1848   O << " = vp.load ";
1849   printOperands(O, SlotTracker);
1850 }
1851 
1852 void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
1853                                VPSlotTracker &SlotTracker) const {
1854   O << Indent << "WIDEN store ";
1855   printOperands(O, SlotTracker);
1856 }
1857 
1858 void VPWidenStoreEVLRecipe::print(raw_ostream &O, const Twine &Indent,
1859                                   VPSlotTracker &SlotTracker) const {
1860   O << Indent << "WIDEN vp.store ";
1861   printOperands(O, SlotTracker);
1862 }
1863 #endif
1864 
1865 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
1866   Value *Start = getStartValue()->getLiveInIRValue();
1867   PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index");
1868   EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1869 
1870   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1871   EntryPart->addIncoming(Start, VectorPH);
1872   EntryPart->setDebugLoc(getDebugLoc());
1873   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1874     State.set(this, EntryPart, Part, /*IsScalar*/ true);
1875 }
1876 
1877 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1878 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1879                                    VPSlotTracker &SlotTracker) const {
1880   O << Indent << "EMIT ";
1881   printAsOperand(O, SlotTracker);
1882   O << " = CANONICAL-INDUCTION ";
1883   printOperands(O, SlotTracker);
1884 }
1885 #endif
1886 
1887 bool VPCanonicalIVPHIRecipe::isCanonical(
1888     InductionDescriptor::InductionKind Kind, VPValue *Start,
1889     VPValue *Step) const {
1890   // Must be an integer induction.
1891   if (Kind != InductionDescriptor::IK_IntInduction)
1892     return false;
1893   // Start must match the start value of this canonical induction.
1894   if (Start != getStartValue())
1895     return false;
1896 
1897   // If the step is defined by a recipe, it is not a ConstantInt.
1898   if (Step->getDefiningRecipe())
1899     return false;
1900 
1901   ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
1902   return StepC && StepC->isOne();
1903 }
1904 
1905 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) {
1906   return IsScalarAfterVectorization &&
1907          (!IsScalable || vputils::onlyFirstLaneUsed(this));
1908 }
1909 
1910 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1911 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
1912                                           VPSlotTracker &SlotTracker) const {
1913   O << Indent << "EMIT ";
1914   printAsOperand(O, SlotTracker);
1915   O << " = WIDEN-POINTER-INDUCTION ";
1916   getStartValue()->printAsOperand(O, SlotTracker);
1917   O << ", " << *IndDesc.getStep();
1918 }
1919 #endif
1920 
1921 void VPExpandSCEVRecipe::execute(VPTransformState &State) {
1922   assert(!State.Instance && "cannot be used in per-lane");
1923   const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
1924   SCEVExpander Exp(SE, DL, "induction");
1925 
1926   Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
1927                                  &*State.Builder.GetInsertPoint());
1928   assert(!State.ExpandedSCEVs.contains(Expr) &&
1929          "Same SCEV expanded multiple times");
1930   State.ExpandedSCEVs[Expr] = Res;
1931   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1932     State.set(this, Res, {Part, 0});
1933 }
1934 
1935 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1936 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
1937                                VPSlotTracker &SlotTracker) const {
1938   O << Indent << "EMIT ";
1939   getVPSingleValue()->printAsOperand(O, SlotTracker);
1940   O << " = EXPAND SCEV " << *Expr;
1941 }
1942 #endif
1943 
1944 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
1945   Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true);
1946   Type *STy = CanonicalIV->getType();
1947   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
1948   ElementCount VF = State.VF;
1949   Value *VStart = VF.isScalar()
1950                       ? CanonicalIV
1951                       : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
1952   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1953     Value *VStep = createStepForVF(Builder, STy, VF, Part);
1954     if (VF.isVector()) {
1955       VStep = Builder.CreateVectorSplat(VF, VStep);
1956       VStep =
1957           Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
1958     }
1959     Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1960     State.set(this, CanonicalVectorIV, Part);
1961   }
1962 }
1963 
1964 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1965 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
1966                                      VPSlotTracker &SlotTracker) const {
1967   O << Indent << "EMIT ";
1968   printAsOperand(O, SlotTracker);
1969   O << " = WIDEN-CANONICAL-INDUCTION ";
1970   printOperands(O, SlotTracker);
1971 }
1972 #endif
1973 
1974 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
1975   auto &Builder = State.Builder;
1976   // Create a vector from the initial value.
1977   auto *VectorInit = getStartValue()->getLiveInIRValue();
1978 
1979   Type *VecTy = State.VF.isScalar()
1980                     ? VectorInit->getType()
1981                     : VectorType::get(VectorInit->getType(), State.VF);
1982 
1983   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1984   if (State.VF.isVector()) {
1985     auto *IdxTy = Builder.getInt32Ty();
1986     auto *One = ConstantInt::get(IdxTy, 1);
1987     IRBuilder<>::InsertPointGuard Guard(Builder);
1988     Builder.SetInsertPoint(VectorPH->getTerminator());
1989     auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
1990     auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
1991     VectorInit = Builder.CreateInsertElement(
1992         PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
1993   }
1994 
1995   // Create a phi node for the new recurrence.
1996   PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur");
1997   EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1998   EntryPart->addIncoming(VectorInit, VectorPH);
1999   State.set(this, EntryPart, 0);
2000 }
2001 
2002 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2003 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
2004                                             VPSlotTracker &SlotTracker) const {
2005   O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
2006   printAsOperand(O, SlotTracker);
2007   O << " = phi ";
2008   printOperands(O, SlotTracker);
2009 }
2010 #endif
2011 
2012 void VPReductionPHIRecipe::execute(VPTransformState &State) {
2013   auto &Builder = State.Builder;
2014 
2015   // Reductions do not have to start at zero. They can start with
2016   // any loop invariant values.
2017   VPValue *StartVPV = getStartValue();
2018   Value *StartV = StartVPV->getLiveInIRValue();
2019 
2020   // In order to support recurrences we need to be able to vectorize Phi nodes.
2021   // Phi nodes have cycles, so we need to vectorize them in two stages. This is
2022   // stage #1: We create a new vector PHI node with no incoming edges. We'll use
2023   // this value when we vectorize all of the instructions that use the PHI.
2024   bool ScalarPHI = State.VF.isScalar() || IsInLoop;
2025   Type *VecTy = ScalarPHI ? StartV->getType()
2026                           : VectorType::get(StartV->getType(), State.VF);
2027 
2028   BasicBlock *HeaderBB = State.CFG.PrevBB;
2029   assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
2030          "recipe must be in the vector loop header");
2031   unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
2032   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
2033     Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");
2034     EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());
2035     State.set(this, EntryPart, Part, IsInLoop);
2036   }
2037 
2038   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2039 
2040   Value *Iden = nullptr;
2041   RecurKind RK = RdxDesc.getRecurrenceKind();
2042   if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
2043       RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
2044     // MinMax and AnyOf reductions have the start value as their identity.
2045     if (ScalarPHI) {
2046       Iden = StartV;
2047     } else {
2048       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
2049       Builder.SetInsertPoint(VectorPH->getTerminator());
2050       StartV = Iden =
2051           Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
2052     }
2053   } else {
2054     Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
2055                                          RdxDesc.getFastMathFlags());
2056 
2057     if (!ScalarPHI) {
2058       Iden = Builder.CreateVectorSplat(State.VF, Iden);
2059       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
2060       Builder.SetInsertPoint(VectorPH->getTerminator());
2061       Constant *Zero = Builder.getInt32(0);
2062       StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
2063     }
2064   }
2065 
2066   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
2067     Value *EntryPart = State.get(this, Part, IsInLoop);
2068     // Make sure to add the reduction start value only to the
2069     // first unroll part.
2070     Value *StartVal = (Part == 0) ? StartV : Iden;
2071     cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
2072   }
2073 }
2074 
2075 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2076 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2077                                  VPSlotTracker &SlotTracker) const {
2078   O << Indent << "WIDEN-REDUCTION-PHI ";
2079 
2080   printAsOperand(O, SlotTracker);
2081   O << " = phi ";
2082   printOperands(O, SlotTracker);
2083 }
2084 #endif
2085 
2086 void VPWidenPHIRecipe::execute(VPTransformState &State) {
2087   assert(EnableVPlanNativePath &&
2088          "Non-native vplans are not expected to have VPWidenPHIRecipes.");
2089 
2090   Value *Op0 = State.get(getOperand(0), 0);
2091   Type *VecTy = Op0->getType();
2092   Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
2093   State.set(this, VecPhi, 0);
2094 }
2095 
2096 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2097 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2098                              VPSlotTracker &SlotTracker) const {
2099   O << Indent << "WIDEN-PHI ";
2100 
2101   auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
2102   // Unless all incoming values are modeled in VPlan  print the original PHI
2103   // directly.
2104   // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
2105   // values as VPValues.
2106   if (getNumOperands() != OriginalPhi->getNumOperands()) {
2107     O << VPlanIngredient(OriginalPhi);
2108     return;
2109   }
2110 
2111   printAsOperand(O, SlotTracker);
2112   O << " = phi ";
2113   printOperands(O, SlotTracker);
2114 }
2115 #endif
2116 
2117 // TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2118 // remove VPActiveLaneMaskPHIRecipe.
2119 void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {
2120   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2121   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
2122     Value *StartMask = State.get(getOperand(0), Part);
2123     PHINode *EntryPart =
2124         State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
2125     EntryPart->addIncoming(StartMask, VectorPH);
2126     EntryPart->setDebugLoc(getDebugLoc());
2127     State.set(this, EntryPart, Part);
2128   }
2129 }
2130 
2131 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2132 void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2133                                       VPSlotTracker &SlotTracker) const {
2134   O << Indent << "ACTIVE-LANE-MASK-PHI ";
2135 
2136   printAsOperand(O, SlotTracker);
2137   O << " = phi ";
2138   printOperands(O, SlotTracker);
2139 }
2140 #endif
2141 
2142 void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) {
2143   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2144   assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization.");
2145   Value *Start = State.get(getOperand(0), VPIteration(0, 0));
2146   PHINode *EntryPart =
2147       State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv");
2148   EntryPart->addIncoming(Start, VectorPH);
2149   EntryPart->setDebugLoc(getDebugLoc());
2150   State.set(this, EntryPart, 0, /*IsScalar=*/true);
2151 }
2152 
2153 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2154 void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2155                                   VPSlotTracker &SlotTracker) const {
2156   O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
2157 
2158   printAsOperand(O, SlotTracker);
2159   O << " = phi ";
2160   printOperands(O, SlotTracker);
2161 }
2162 #endif
2163