xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
181ad6265SDimitry Andric //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
281ad6265SDimitry Andric //
381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
681ad6265SDimitry Andric //
781ad6265SDimitry Andric //===----------------------------------------------------------------------===//
881ad6265SDimitry Andric ///
981ad6265SDimitry Andric /// \file
1081ad6265SDimitry Andric /// This file contains implementations for different VPlan recipes.
1181ad6265SDimitry Andric ///
1281ad6265SDimitry Andric //===----------------------------------------------------------------------===//
1381ad6265SDimitry Andric 
1481ad6265SDimitry Andric #include "VPlan.h"
155f757f3fSDimitry Andric #include "VPlanAnalysis.h"
1681ad6265SDimitry Andric #include "llvm/ADT/STLExtras.h"
1781ad6265SDimitry Andric #include "llvm/ADT/SmallVector.h"
1881ad6265SDimitry Andric #include "llvm/ADT/Twine.h"
1981ad6265SDimitry Andric #include "llvm/Analysis/IVDescriptors.h"
2081ad6265SDimitry Andric #include "llvm/IR/BasicBlock.h"
2181ad6265SDimitry Andric #include "llvm/IR/IRBuilder.h"
2281ad6265SDimitry Andric #include "llvm/IR/Instruction.h"
2381ad6265SDimitry Andric #include "llvm/IR/Instructions.h"
2481ad6265SDimitry Andric #include "llvm/IR/Type.h"
2581ad6265SDimitry Andric #include "llvm/IR/Value.h"
2681ad6265SDimitry Andric #include "llvm/Support/Casting.h"
2781ad6265SDimitry Andric #include "llvm/Support/CommandLine.h"
2881ad6265SDimitry Andric #include "llvm/Support/Debug.h"
2981ad6265SDimitry Andric #include "llvm/Support/raw_ostream.h"
30753f127fSDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
311db9f3b2SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
3281ad6265SDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
3381ad6265SDimitry Andric #include <cassert>
3481ad6265SDimitry Andric 
3581ad6265SDimitry Andric using namespace llvm;
3681ad6265SDimitry Andric 
37753f127fSDimitry Andric using VectorParts = SmallVector<Value *, 2>;
38753f127fSDimitry Andric 
3906c3fb27SDimitry Andric namespace llvm {
4081ad6265SDimitry Andric extern cl::opt<bool> EnableVPlanNativePath;
4106c3fb27SDimitry Andric }
42*0fca6ea1SDimitry Andric extern cl::opt<unsigned> ForceTargetInstructionCost;
4381ad6265SDimitry Andric 
44753f127fSDimitry Andric #define LV_NAME "loop-vectorize"
45753f127fSDimitry Andric #define DEBUG_TYPE LV_NAME
46753f127fSDimitry Andric 
4781ad6265SDimitry Andric bool VPRecipeBase::mayWriteToMemory() const {
4881ad6265SDimitry Andric   switch (getVPDefID()) {
495f757f3fSDimitry Andric   case VPInterleaveSC:
505f757f3fSDimitry Andric     return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
51*0fca6ea1SDimitry Andric   case VPWidenStoreEVLSC:
52*0fca6ea1SDimitry Andric   case VPWidenStoreSC:
53*0fca6ea1SDimitry Andric     return true;
5481ad6265SDimitry Andric   case VPReplicateSC:
5581ad6265SDimitry Andric     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
5681ad6265SDimitry Andric         ->mayWriteToMemory();
57*0fca6ea1SDimitry Andric   case VPWidenCallSC:
58*0fca6ea1SDimitry Andric     return !cast<VPWidenCallRecipe>(this)
59*0fca6ea1SDimitry Andric                 ->getCalledScalarFunction()
60*0fca6ea1SDimitry Andric                 ->onlyReadsMemory();
6181ad6265SDimitry Andric   case VPBranchOnMaskSC:
62bdd1243dSDimitry Andric   case VPScalarIVStepsSC:
6306c3fb27SDimitry Andric   case VPPredInstPHISC:
6481ad6265SDimitry Andric     return false;
6581ad6265SDimitry Andric   case VPBlendSC:
66*0fca6ea1SDimitry Andric   case VPReductionEVLSC:
6781ad6265SDimitry Andric   case VPReductionSC:
6806c3fb27SDimitry Andric   case VPWidenCanonicalIVSC:
6906c3fb27SDimitry Andric   case VPWidenCastSC:
7006c3fb27SDimitry Andric   case VPWidenGEPSC:
7106c3fb27SDimitry Andric   case VPWidenIntOrFpInductionSC:
72*0fca6ea1SDimitry Andric   case VPWidenLoadEVLSC:
73*0fca6ea1SDimitry Andric   case VPWidenLoadSC:
7406c3fb27SDimitry Andric   case VPWidenPHISC:
7506c3fb27SDimitry Andric   case VPWidenSC:
7681ad6265SDimitry Andric   case VPWidenSelectSC: {
7781ad6265SDimitry Andric     const Instruction *I =
7881ad6265SDimitry Andric         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
7981ad6265SDimitry Andric     (void)I;
8081ad6265SDimitry Andric     assert((!I || !I->mayWriteToMemory()) &&
8181ad6265SDimitry Andric            "underlying instruction may write to memory");
8281ad6265SDimitry Andric     return false;
8381ad6265SDimitry Andric   }
8481ad6265SDimitry Andric   default:
8581ad6265SDimitry Andric     return true;
8681ad6265SDimitry Andric   }
8781ad6265SDimitry Andric }
8881ad6265SDimitry Andric 
8981ad6265SDimitry Andric bool VPRecipeBase::mayReadFromMemory() const {
9081ad6265SDimitry Andric   switch (getVPDefID()) {
91*0fca6ea1SDimitry Andric   case VPWidenLoadEVLSC:
92*0fca6ea1SDimitry Andric   case VPWidenLoadSC:
93*0fca6ea1SDimitry Andric     return true;
9481ad6265SDimitry Andric   case VPReplicateSC:
9581ad6265SDimitry Andric     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
9681ad6265SDimitry Andric         ->mayReadFromMemory();
97*0fca6ea1SDimitry Andric   case VPWidenCallSC:
98*0fca6ea1SDimitry Andric     return !cast<VPWidenCallRecipe>(this)
99*0fca6ea1SDimitry Andric                 ->getCalledScalarFunction()
100*0fca6ea1SDimitry Andric                 ->onlyWritesMemory();
10181ad6265SDimitry Andric   case VPBranchOnMaskSC:
10206c3fb27SDimitry Andric   case VPPredInstPHISC:
103*0fca6ea1SDimitry Andric   case VPScalarIVStepsSC:
104*0fca6ea1SDimitry Andric   case VPWidenStoreEVLSC:
105*0fca6ea1SDimitry Andric   case VPWidenStoreSC:
10681ad6265SDimitry Andric     return false;
10781ad6265SDimitry Andric   case VPBlendSC:
108*0fca6ea1SDimitry Andric   case VPReductionEVLSC:
10981ad6265SDimitry Andric   case VPReductionSC:
11006c3fb27SDimitry Andric   case VPWidenCanonicalIVSC:
11106c3fb27SDimitry Andric   case VPWidenCastSC:
11206c3fb27SDimitry Andric   case VPWidenGEPSC:
11306c3fb27SDimitry Andric   case VPWidenIntOrFpInductionSC:
11406c3fb27SDimitry Andric   case VPWidenPHISC:
11506c3fb27SDimitry Andric   case VPWidenSC:
11681ad6265SDimitry Andric   case VPWidenSelectSC: {
11781ad6265SDimitry Andric     const Instruction *I =
11881ad6265SDimitry Andric         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
11981ad6265SDimitry Andric     (void)I;
12081ad6265SDimitry Andric     assert((!I || !I->mayReadFromMemory()) &&
12181ad6265SDimitry Andric            "underlying instruction may read from memory");
12281ad6265SDimitry Andric     return false;
12381ad6265SDimitry Andric   }
12481ad6265SDimitry Andric   default:
12581ad6265SDimitry Andric     return true;
12681ad6265SDimitry Andric   }
12781ad6265SDimitry Andric }
12881ad6265SDimitry Andric 
12981ad6265SDimitry Andric bool VPRecipeBase::mayHaveSideEffects() const {
13081ad6265SDimitry Andric   switch (getVPDefID()) {
131bdd1243dSDimitry Andric   case VPDerivedIVSC:
132bdd1243dSDimitry Andric   case VPPredInstPHISC:
133*0fca6ea1SDimitry Andric   case VPScalarCastSC:
134bdd1243dSDimitry Andric     return false;
1355f757f3fSDimitry Andric   case VPInstructionSC:
1365f757f3fSDimitry Andric     switch (cast<VPInstruction>(this)->getOpcode()) {
1371db9f3b2SDimitry Andric     case Instruction::Or:
1385f757f3fSDimitry Andric     case Instruction::ICmp:
1391db9f3b2SDimitry Andric     case Instruction::Select:
1405f757f3fSDimitry Andric     case VPInstruction::Not:
1415f757f3fSDimitry Andric     case VPInstruction::CalculateTripCountMinusVF:
1425f757f3fSDimitry Andric     case VPInstruction::CanonicalIVIncrementForPart:
143*0fca6ea1SDimitry Andric     case VPInstruction::ExtractFromEnd:
144*0fca6ea1SDimitry Andric     case VPInstruction::FirstOrderRecurrenceSplice:
145*0fca6ea1SDimitry Andric     case VPInstruction::LogicalAnd:
146*0fca6ea1SDimitry Andric     case VPInstruction::PtrAdd:
1475f757f3fSDimitry Andric       return false;
1485f757f3fSDimitry Andric     default:
1495f757f3fSDimitry Andric       return true;
1505f757f3fSDimitry Andric     }
151*0fca6ea1SDimitry Andric   case VPWidenCallSC: {
152*0fca6ea1SDimitry Andric     Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
153*0fca6ea1SDimitry Andric     return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
154*0fca6ea1SDimitry Andric   }
15581ad6265SDimitry Andric   case VPBlendSC:
156*0fca6ea1SDimitry Andric   case VPReductionEVLSC:
15781ad6265SDimitry Andric   case VPReductionSC:
15806c3fb27SDimitry Andric   case VPScalarIVStepsSC:
15906c3fb27SDimitry Andric   case VPWidenCanonicalIVSC:
16006c3fb27SDimitry Andric   case VPWidenCastSC:
16106c3fb27SDimitry Andric   case VPWidenGEPSC:
16206c3fb27SDimitry Andric   case VPWidenIntOrFpInductionSC:
16306c3fb27SDimitry Andric   case VPWidenPHISC:
16406c3fb27SDimitry Andric   case VPWidenPointerInductionSC:
16506c3fb27SDimitry Andric   case VPWidenSC:
16606c3fb27SDimitry Andric   case VPWidenSelectSC: {
16781ad6265SDimitry Andric     const Instruction *I =
16881ad6265SDimitry Andric         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
16981ad6265SDimitry Andric     (void)I;
17081ad6265SDimitry Andric     assert((!I || !I->mayHaveSideEffects()) &&
17181ad6265SDimitry Andric            "underlying instruction has side-effects");
17281ad6265SDimitry Andric     return false;
17381ad6265SDimitry Andric   }
1745f757f3fSDimitry Andric   case VPInterleaveSC:
1755f757f3fSDimitry Andric     return mayWriteToMemory();
176*0fca6ea1SDimitry Andric   case VPWidenLoadEVLSC:
177*0fca6ea1SDimitry Andric   case VPWidenLoadSC:
178*0fca6ea1SDimitry Andric   case VPWidenStoreEVLSC:
179*0fca6ea1SDimitry Andric   case VPWidenStoreSC:
180*0fca6ea1SDimitry Andric     assert(
181*0fca6ea1SDimitry Andric         cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
182*0fca6ea1SDimitry Andric             mayWriteToMemory() &&
18306c3fb27SDimitry Andric         "mayHaveSideffects result for ingredient differs from this "
18406c3fb27SDimitry Andric         "implementation");
18506c3fb27SDimitry Andric     return mayWriteToMemory();
18681ad6265SDimitry Andric   case VPReplicateSC: {
18781ad6265SDimitry Andric     auto *R = cast<VPReplicateRecipe>(this);
18881ad6265SDimitry Andric     return R->getUnderlyingInstr()->mayHaveSideEffects();
18981ad6265SDimitry Andric   }
19081ad6265SDimitry Andric   default:
19181ad6265SDimitry Andric     return true;
19281ad6265SDimitry Andric   }
19381ad6265SDimitry Andric }
19481ad6265SDimitry Andric 
19581ad6265SDimitry Andric void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
19681ad6265SDimitry Andric   VPValue *ExitValue = getOperand(0);
197*0fca6ea1SDimitry Andric   auto Lane = vputils::isUniformAfterVectorization(ExitValue)
198*0fca6ea1SDimitry Andric                   ? VPLane::getFirstLane()
199*0fca6ea1SDimitry Andric                   : VPLane::getLastLaneForVF(State.VF);
2005f757f3fSDimitry Andric   VPBasicBlock *MiddleVPBB =
2015f757f3fSDimitry Andric       cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
202*0fca6ea1SDimitry Andric   VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe();
203*0fca6ea1SDimitry Andric   auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr;
204*0fca6ea1SDimitry Andric   // Values leaving the vector loop reach live out phi's in the exiting block
205*0fca6ea1SDimitry Andric   // via middle block.
206*0fca6ea1SDimitry Andric   auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion()
207*0fca6ea1SDimitry Andric                        ? MiddleVPBB
208*0fca6ea1SDimitry Andric                        : ExitingVPBB;
209*0fca6ea1SDimitry Andric   BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
210*0fca6ea1SDimitry Andric   // Set insertion point in PredBB in case an extract needs to be generated.
211*0fca6ea1SDimitry Andric   // TODO: Model extracts explicitly.
212*0fca6ea1SDimitry Andric   State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
213*0fca6ea1SDimitry Andric   Value *V = State.get(ExitValue, VPIteration(State.UF - 1, Lane));
214*0fca6ea1SDimitry Andric   if (Phi->getBasicBlockIndex(PredBB) != -1)
215*0fca6ea1SDimitry Andric     Phi->setIncomingValueForBlock(PredBB, V);
216*0fca6ea1SDimitry Andric   else
217*0fca6ea1SDimitry Andric     Phi->addIncoming(V, PredBB);
21881ad6265SDimitry Andric }
21981ad6265SDimitry Andric 
22006c3fb27SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
22106c3fb27SDimitry Andric void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
22206c3fb27SDimitry Andric   O << "Live-out ";
22306c3fb27SDimitry Andric   getPhi()->printAsOperand(O);
22406c3fb27SDimitry Andric   O << " = ";
22506c3fb27SDimitry Andric   getOperand(0)->printAsOperand(O, SlotTracker);
22606c3fb27SDimitry Andric   O << "\n";
22706c3fb27SDimitry Andric }
22806c3fb27SDimitry Andric #endif
22906c3fb27SDimitry Andric 
23081ad6265SDimitry Andric void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
23181ad6265SDimitry Andric   assert(!Parent && "Recipe already in some VPBasicBlock");
23281ad6265SDimitry Andric   assert(InsertPos->getParent() &&
23381ad6265SDimitry Andric          "Insertion position not in any VPBasicBlock");
234*0fca6ea1SDimitry Andric   InsertPos->getParent()->insert(this, InsertPos->getIterator());
23581ad6265SDimitry Andric }
23681ad6265SDimitry Andric 
23781ad6265SDimitry Andric void VPRecipeBase::insertBefore(VPBasicBlock &BB,
23881ad6265SDimitry Andric                                 iplist<VPRecipeBase>::iterator I) {
23981ad6265SDimitry Andric   assert(!Parent && "Recipe already in some VPBasicBlock");
24081ad6265SDimitry Andric   assert(I == BB.end() || I->getParent() == &BB);
241*0fca6ea1SDimitry Andric   BB.insert(this, I);
24281ad6265SDimitry Andric }
24381ad6265SDimitry Andric 
24481ad6265SDimitry Andric void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
24581ad6265SDimitry Andric   assert(!Parent && "Recipe already in some VPBasicBlock");
24681ad6265SDimitry Andric   assert(InsertPos->getParent() &&
24781ad6265SDimitry Andric          "Insertion position not in any VPBasicBlock");
248*0fca6ea1SDimitry Andric   InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
24981ad6265SDimitry Andric }
25081ad6265SDimitry Andric 
25181ad6265SDimitry Andric void VPRecipeBase::removeFromParent() {
25281ad6265SDimitry Andric   assert(getParent() && "Recipe not in any VPBasicBlock");
25381ad6265SDimitry Andric   getParent()->getRecipeList().remove(getIterator());
25481ad6265SDimitry Andric   Parent = nullptr;
25581ad6265SDimitry Andric }
25681ad6265SDimitry Andric 
25781ad6265SDimitry Andric iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
25881ad6265SDimitry Andric   assert(getParent() && "Recipe not in any VPBasicBlock");
25981ad6265SDimitry Andric   return getParent()->getRecipeList().erase(getIterator());
26081ad6265SDimitry Andric }
26181ad6265SDimitry Andric 
26281ad6265SDimitry Andric void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
26381ad6265SDimitry Andric   removeFromParent();
26481ad6265SDimitry Andric   insertAfter(InsertPos);
26581ad6265SDimitry Andric }
26681ad6265SDimitry Andric 
26781ad6265SDimitry Andric void VPRecipeBase::moveBefore(VPBasicBlock &BB,
26881ad6265SDimitry Andric                               iplist<VPRecipeBase>::iterator I) {
26981ad6265SDimitry Andric   removeFromParent();
27081ad6265SDimitry Andric   insertBefore(BB, I);
27181ad6265SDimitry Andric }
27281ad6265SDimitry Andric 
273*0fca6ea1SDimitry Andric /// Return the underlying instruction to be used for computing \p R's cost via
274*0fca6ea1SDimitry Andric /// the legacy cost model. Return nullptr if there's no suitable instruction.
275*0fca6ea1SDimitry Andric static Instruction *getInstructionForCost(const VPRecipeBase *R) {
276*0fca6ea1SDimitry Andric   if (auto *S = dyn_cast<VPSingleDefRecipe>(R))
277*0fca6ea1SDimitry Andric     return dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
278*0fca6ea1SDimitry Andric   if (auto *IG = dyn_cast<VPInterleaveRecipe>(R))
279*0fca6ea1SDimitry Andric     return IG->getInsertPos();
280*0fca6ea1SDimitry Andric   if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(R))
281*0fca6ea1SDimitry Andric     return &WidenMem->getIngredient();
282*0fca6ea1SDimitry Andric   return nullptr;
283*0fca6ea1SDimitry Andric }
284*0fca6ea1SDimitry Andric 
285*0fca6ea1SDimitry Andric InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
286*0fca6ea1SDimitry Andric   if (auto *UI = getInstructionForCost(this))
287*0fca6ea1SDimitry Andric     if (Ctx.skipCostComputation(UI, VF.isVector()))
288*0fca6ea1SDimitry Andric       return 0;
289*0fca6ea1SDimitry Andric 
290*0fca6ea1SDimitry Andric   InstructionCost RecipeCost = computeCost(VF, Ctx);
291*0fca6ea1SDimitry Andric   if (ForceTargetInstructionCost.getNumOccurrences() > 0 &&
292*0fca6ea1SDimitry Andric       RecipeCost.isValid())
293*0fca6ea1SDimitry Andric     RecipeCost = InstructionCost(ForceTargetInstructionCost);
294*0fca6ea1SDimitry Andric 
295*0fca6ea1SDimitry Andric   LLVM_DEBUG({
296*0fca6ea1SDimitry Andric     dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
297*0fca6ea1SDimitry Andric     dump();
298*0fca6ea1SDimitry Andric   });
299*0fca6ea1SDimitry Andric   return RecipeCost;
300*0fca6ea1SDimitry Andric }
301*0fca6ea1SDimitry Andric 
302*0fca6ea1SDimitry Andric InstructionCost VPRecipeBase::computeCost(ElementCount VF,
303*0fca6ea1SDimitry Andric                                           VPCostContext &Ctx) const {
304*0fca6ea1SDimitry Andric   // Compute the cost for the recipe falling back to the legacy cost model using
305*0fca6ea1SDimitry Andric   // the underlying instruction. If there is no underlying instruction, returns
306*0fca6ea1SDimitry Andric   // 0.
307*0fca6ea1SDimitry Andric   Instruction *UI = getInstructionForCost(this);
308*0fca6ea1SDimitry Andric   if (UI && isa<VPReplicateRecipe>(this)) {
309*0fca6ea1SDimitry Andric     // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
310*0fca6ea1SDimitry Andric     // transform, avoid computing their cost multiple times for now.
311*0fca6ea1SDimitry Andric     Ctx.SkipCostComputation.insert(UI);
312*0fca6ea1SDimitry Andric   }
313*0fca6ea1SDimitry Andric   return UI ? Ctx.getLegacyCost(UI, VF) : 0;
314*0fca6ea1SDimitry Andric }
315*0fca6ea1SDimitry Andric 
3165f757f3fSDimitry Andric FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const {
3175f757f3fSDimitry Andric   assert(OpType == OperationType::FPMathOp &&
3185f757f3fSDimitry Andric          "recipe doesn't have fast math flags");
3195f757f3fSDimitry Andric   FastMathFlags Res;
3205f757f3fSDimitry Andric   Res.setAllowReassoc(FMFs.AllowReassoc);
3215f757f3fSDimitry Andric   Res.setNoNaNs(FMFs.NoNaNs);
3225f757f3fSDimitry Andric   Res.setNoInfs(FMFs.NoInfs);
3235f757f3fSDimitry Andric   Res.setNoSignedZeros(FMFs.NoSignedZeros);
3245f757f3fSDimitry Andric   Res.setAllowReciprocal(FMFs.AllowReciprocal);
3255f757f3fSDimitry Andric   Res.setAllowContract(FMFs.AllowContract);
3265f757f3fSDimitry Andric   Res.setApproxFunc(FMFs.ApproxFunc);
3275f757f3fSDimitry Andric   return Res;
3285f757f3fSDimitry Andric }
3295f757f3fSDimitry Andric 
3305f757f3fSDimitry Andric VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred,
3315f757f3fSDimitry Andric                              VPValue *A, VPValue *B, DebugLoc DL,
3325f757f3fSDimitry Andric                              const Twine &Name)
3335f757f3fSDimitry Andric     : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
3345f757f3fSDimitry Andric                           Pred, DL),
3357a6dacacSDimitry Andric       Opcode(Opcode), Name(Name.str()) {
3365f757f3fSDimitry Andric   assert(Opcode == Instruction::ICmp &&
3375f757f3fSDimitry Andric          "only ICmp predicates supported at the moment");
3385f757f3fSDimitry Andric }
3395f757f3fSDimitry Andric 
3405f757f3fSDimitry Andric VPInstruction::VPInstruction(unsigned Opcode,
3415f757f3fSDimitry Andric                              std::initializer_list<VPValue *> Operands,
3425f757f3fSDimitry Andric                              FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
3435f757f3fSDimitry Andric     : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
3447a6dacacSDimitry Andric       Opcode(Opcode), Name(Name.str()) {
3455f757f3fSDimitry Andric   // Make sure the VPInstruction is a floating-point operation.
3465f757f3fSDimitry Andric   assert(isFPMathOp() && "this op can't take fast-math flags");
3475f757f3fSDimitry Andric }
3485f757f3fSDimitry Andric 
349*0fca6ea1SDimitry Andric bool VPInstruction::doesGeneratePerAllLanes() const {
350*0fca6ea1SDimitry Andric   return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
351*0fca6ea1SDimitry Andric }
352*0fca6ea1SDimitry Andric 
353*0fca6ea1SDimitry Andric bool VPInstruction::canGenerateScalarForFirstLane() const {
354*0fca6ea1SDimitry Andric   if (Instruction::isBinaryOp(getOpcode()))
355*0fca6ea1SDimitry Andric     return true;
356*0fca6ea1SDimitry Andric   if (isSingleScalar() || isVectorToScalar())
357*0fca6ea1SDimitry Andric     return true;
358*0fca6ea1SDimitry Andric   switch (Opcode) {
359*0fca6ea1SDimitry Andric   case Instruction::ICmp:
360*0fca6ea1SDimitry Andric   case VPInstruction::BranchOnCond:
361*0fca6ea1SDimitry Andric   case VPInstruction::BranchOnCount:
362*0fca6ea1SDimitry Andric   case VPInstruction::CalculateTripCountMinusVF:
363*0fca6ea1SDimitry Andric   case VPInstruction::CanonicalIVIncrementForPart:
364*0fca6ea1SDimitry Andric   case VPInstruction::PtrAdd:
365*0fca6ea1SDimitry Andric   case VPInstruction::ExplicitVectorLength:
366*0fca6ea1SDimitry Andric     return true;
367*0fca6ea1SDimitry Andric   default:
368*0fca6ea1SDimitry Andric     return false;
369*0fca6ea1SDimitry Andric   }
370*0fca6ea1SDimitry Andric }
371*0fca6ea1SDimitry Andric 
372*0fca6ea1SDimitry Andric Value *VPInstruction::generatePerLane(VPTransformState &State,
373*0fca6ea1SDimitry Andric                                       const VPIteration &Lane) {
37481ad6265SDimitry Andric   IRBuilderBase &Builder = State.Builder;
375*0fca6ea1SDimitry Andric 
376*0fca6ea1SDimitry Andric   assert(getOpcode() == VPInstruction::PtrAdd &&
377*0fca6ea1SDimitry Andric          "only PtrAdd opcodes are supported for now");
378*0fca6ea1SDimitry Andric   return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
379*0fca6ea1SDimitry Andric                               State.get(getOperand(1), Lane), Name);
380*0fca6ea1SDimitry Andric }
381*0fca6ea1SDimitry Andric 
382*0fca6ea1SDimitry Andric Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
383*0fca6ea1SDimitry Andric   IRBuilderBase &Builder = State.Builder;
38481ad6265SDimitry Andric 
38581ad6265SDimitry Andric   if (Instruction::isBinaryOp(getOpcode())) {
386*0fca6ea1SDimitry Andric     bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
387*0fca6ea1SDimitry Andric     Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
388*0fca6ea1SDimitry Andric     Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
3895f757f3fSDimitry Andric     auto *Res =
3905f757f3fSDimitry Andric         Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
3915f757f3fSDimitry Andric     if (auto *I = dyn_cast<Instruction>(Res))
3925f757f3fSDimitry Andric       setFlags(I);
3935f757f3fSDimitry Andric     return Res;
39481ad6265SDimitry Andric   }
39581ad6265SDimitry Andric 
39681ad6265SDimitry Andric   switch (getOpcode()) {
39781ad6265SDimitry Andric   case VPInstruction::Not: {
39881ad6265SDimitry Andric     Value *A = State.get(getOperand(0), Part);
39906c3fb27SDimitry Andric     return Builder.CreateNot(A, Name);
40081ad6265SDimitry Andric   }
4015f757f3fSDimitry Andric   case Instruction::ICmp: {
402*0fca6ea1SDimitry Andric     bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
403*0fca6ea1SDimitry Andric     Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
404*0fca6ea1SDimitry Andric     Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
4055f757f3fSDimitry Andric     return Builder.CreateCmp(getPredicate(), A, B, Name);
40681ad6265SDimitry Andric   }
40781ad6265SDimitry Andric   case Instruction::Select: {
40881ad6265SDimitry Andric     Value *Cond = State.get(getOperand(0), Part);
40981ad6265SDimitry Andric     Value *Op1 = State.get(getOperand(1), Part);
41081ad6265SDimitry Andric     Value *Op2 = State.get(getOperand(2), Part);
41106c3fb27SDimitry Andric     return Builder.CreateSelect(Cond, Op1, Op2, Name);
41281ad6265SDimitry Andric   }
41381ad6265SDimitry Andric   case VPInstruction::ActiveLaneMask: {
41481ad6265SDimitry Andric     // Get first lane of vector induction variable.
41581ad6265SDimitry Andric     Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
41681ad6265SDimitry Andric     // Get the original loop tripcount.
41706c3fb27SDimitry Andric     Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0));
41881ad6265SDimitry Andric 
419*0fca6ea1SDimitry Andric     // If this part of the active lane mask is scalar, generate the CMP directly
420*0fca6ea1SDimitry Andric     // to avoid unnecessary extracts.
421*0fca6ea1SDimitry Andric     if (State.VF.isScalar())
422*0fca6ea1SDimitry Andric       return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
423*0fca6ea1SDimitry Andric                                Name);
424*0fca6ea1SDimitry Andric 
42581ad6265SDimitry Andric     auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
42681ad6265SDimitry Andric     auto *PredTy = VectorType::get(Int1Ty, State.VF);
42706c3fb27SDimitry Andric     return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
42806c3fb27SDimitry Andric                                    {PredTy, ScalarTC->getType()},
429753f127fSDimitry Andric                                    {VIVElem0, ScalarTC}, nullptr, Name);
43081ad6265SDimitry Andric   }
43181ad6265SDimitry Andric   case VPInstruction::FirstOrderRecurrenceSplice: {
43281ad6265SDimitry Andric     // Generate code to combine the previous and current values in vector v3.
43381ad6265SDimitry Andric     //
43481ad6265SDimitry Andric     //   vector.ph:
43581ad6265SDimitry Andric     //     v_init = vector(..., ..., ..., a[-1])
43681ad6265SDimitry Andric     //     br vector.body
43781ad6265SDimitry Andric     //
43881ad6265SDimitry Andric     //   vector.body
43981ad6265SDimitry Andric     //     i = phi [0, vector.ph], [i+4, vector.body]
44081ad6265SDimitry Andric     //     v1 = phi [v_init, vector.ph], [v2, vector.body]
44181ad6265SDimitry Andric     //     v2 = a[i, i+1, i+2, i+3];
44281ad6265SDimitry Andric     //     v3 = vector(v1(3), v2(0, 1, 2))
44381ad6265SDimitry Andric 
44481ad6265SDimitry Andric     // For the first part, use the recurrence phi (v1), otherwise v2.
44581ad6265SDimitry Andric     auto *V1 = State.get(getOperand(0), 0);
44681ad6265SDimitry Andric     Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
44706c3fb27SDimitry Andric     if (!PartMinus1->getType()->isVectorTy())
44806c3fb27SDimitry Andric       return PartMinus1;
44981ad6265SDimitry Andric     Value *V2 = State.get(getOperand(1), Part);
45006c3fb27SDimitry Andric     return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name);
45181ad6265SDimitry Andric   }
45206c3fb27SDimitry Andric   case VPInstruction::CalculateTripCountMinusVF: {
453*0fca6ea1SDimitry Andric     if (Part != 0)
454*0fca6ea1SDimitry Andric       return State.get(this, 0, /*IsScalar*/ true);
455*0fca6ea1SDimitry Andric 
45606c3fb27SDimitry Andric     Value *ScalarTC = State.get(getOperand(0), {0, 0});
45706c3fb27SDimitry Andric     Value *Step =
45806c3fb27SDimitry Andric         createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);
45906c3fb27SDimitry Andric     Value *Sub = Builder.CreateSub(ScalarTC, Step);
46006c3fb27SDimitry Andric     Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
46106c3fb27SDimitry Andric     Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
46206c3fb27SDimitry Andric     return Builder.CreateSelect(Cmp, Sub, Zero);
46381ad6265SDimitry Andric   }
464*0fca6ea1SDimitry Andric   case VPInstruction::ExplicitVectorLength: {
465*0fca6ea1SDimitry Andric     // Compute EVL
466*0fca6ea1SDimitry Andric     auto GetEVL = [=](VPTransformState &State, Value *AVL) {
467*0fca6ea1SDimitry Andric       assert(AVL->getType()->isIntegerTy() &&
468*0fca6ea1SDimitry Andric              "Requested vector length should be an integer.");
469*0fca6ea1SDimitry Andric 
470*0fca6ea1SDimitry Andric       // TODO: Add support for MaxSafeDist for correct loop emission.
471*0fca6ea1SDimitry Andric       assert(State.VF.isScalable() && "Expected scalable vector factor.");
472*0fca6ea1SDimitry Andric       Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
473*0fca6ea1SDimitry Andric 
474*0fca6ea1SDimitry Andric       Value *EVL = State.Builder.CreateIntrinsic(
475*0fca6ea1SDimitry Andric           State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
476*0fca6ea1SDimitry Andric           {AVL, VFArg, State.Builder.getTrue()});
477*0fca6ea1SDimitry Andric       return EVL;
478*0fca6ea1SDimitry Andric     };
479*0fca6ea1SDimitry Andric     // TODO: Restructure this code with an explicit remainder loop, vsetvli can
480*0fca6ea1SDimitry Andric     // be outside of the main loop.
481*0fca6ea1SDimitry Andric     assert(Part == 0 && "No unrolling expected for predicated vectorization.");
482*0fca6ea1SDimitry Andric     // Compute VTC - IV as the AVL (requested vector length).
483*0fca6ea1SDimitry Andric     Value *Index = State.get(getOperand(0), VPIteration(0, 0));
484*0fca6ea1SDimitry Andric     Value *TripCount = State.get(getOperand(1), VPIteration(0, 0));
485*0fca6ea1SDimitry Andric     Value *AVL = State.Builder.CreateSub(TripCount, Index);
486*0fca6ea1SDimitry Andric     Value *EVL = GetEVL(State, AVL);
487*0fca6ea1SDimitry Andric     return EVL;
488*0fca6ea1SDimitry Andric   }
4895f757f3fSDimitry Andric   case VPInstruction::CanonicalIVIncrementForPart: {
490753f127fSDimitry Andric     auto *IV = State.get(getOperand(0), VPIteration(0, 0));
49106c3fb27SDimitry Andric     if (Part == 0)
49206c3fb27SDimitry Andric       return IV;
493753f127fSDimitry Andric 
494753f127fSDimitry Andric     // The canonical IV is incremented by the vectorization factor (num of SIMD
495753f127fSDimitry Andric     // elements) times the unroll part.
496753f127fSDimitry Andric     Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
4975f757f3fSDimitry Andric     return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
4985f757f3fSDimitry Andric                              hasNoSignedWrap());
499753f127fSDimitry Andric   }
50081ad6265SDimitry Andric   case VPInstruction::BranchOnCond: {
50181ad6265SDimitry Andric     if (Part != 0)
50206c3fb27SDimitry Andric       return nullptr;
50381ad6265SDimitry Andric 
50481ad6265SDimitry Andric     Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
50581ad6265SDimitry Andric     // Replace the temporary unreachable terminator with a new conditional
50681ad6265SDimitry Andric     // branch, hooking it up to backward destination for exiting blocks now and
50781ad6265SDimitry Andric     // to forward destination(s) later when they are created.
50881ad6265SDimitry Andric     BranchInst *CondBr =
50981ad6265SDimitry Andric         Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
51081ad6265SDimitry Andric     CondBr->setSuccessor(0, nullptr);
51181ad6265SDimitry Andric     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
512*0fca6ea1SDimitry Andric 
513*0fca6ea1SDimitry Andric     if (!getParent()->isExiting())
514*0fca6ea1SDimitry Andric       return CondBr;
515*0fca6ea1SDimitry Andric 
516*0fca6ea1SDimitry Andric     VPRegionBlock *ParentRegion = getParent()->getParent();
517*0fca6ea1SDimitry Andric     VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
518*0fca6ea1SDimitry Andric     CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
51906c3fb27SDimitry Andric     return CondBr;
52081ad6265SDimitry Andric   }
52181ad6265SDimitry Andric   case VPInstruction::BranchOnCount: {
52281ad6265SDimitry Andric     if (Part != 0)
52306c3fb27SDimitry Andric       return nullptr;
52481ad6265SDimitry Andric     // First create the compare.
525*0fca6ea1SDimitry Andric     Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true);
526*0fca6ea1SDimitry Andric     Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true);
52781ad6265SDimitry Andric     Value *Cond = Builder.CreateICmpEQ(IV, TC);
52881ad6265SDimitry Andric 
52981ad6265SDimitry Andric     // Now create the branch.
53081ad6265SDimitry Andric     auto *Plan = getParent()->getPlan();
53181ad6265SDimitry Andric     VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
53281ad6265SDimitry Andric     VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
53381ad6265SDimitry Andric 
53481ad6265SDimitry Andric     // Replace the temporary unreachable terminator with a new conditional
53581ad6265SDimitry Andric     // branch, hooking it up to backward destination (the header) now and to the
53681ad6265SDimitry Andric     // forward destination (the exit/middle block) later when it is created.
53781ad6265SDimitry Andric     // Note that CreateCondBr expects a valid BB as first argument, so we need
53881ad6265SDimitry Andric     // to set it to nullptr later.
53981ad6265SDimitry Andric     BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
54081ad6265SDimitry Andric                                               State.CFG.VPBB2IRBB[Header]);
54181ad6265SDimitry Andric     CondBr->setSuccessor(0, nullptr);
54281ad6265SDimitry Andric     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
54306c3fb27SDimitry Andric     return CondBr;
54481ad6265SDimitry Andric   }
5451db9f3b2SDimitry Andric   case VPInstruction::ComputeReductionResult: {
5461db9f3b2SDimitry Andric     if (Part != 0)
547*0fca6ea1SDimitry Andric       return State.get(this, 0, /*IsScalar*/ true);
5481db9f3b2SDimitry Andric 
5491db9f3b2SDimitry Andric     // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
5501db9f3b2SDimitry Andric     // and will be removed by breaking up the recipe further.
5511db9f3b2SDimitry Andric     auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
5521db9f3b2SDimitry Andric     auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
5531db9f3b2SDimitry Andric     // Get its reduction variable descriptor.
5541db9f3b2SDimitry Andric     const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
5551db9f3b2SDimitry Andric 
5561db9f3b2SDimitry Andric     RecurKind RK = RdxDesc.getRecurrenceKind();
5571db9f3b2SDimitry Andric 
5581db9f3b2SDimitry Andric     VPValue *LoopExitingDef = getOperand(1);
5591db9f3b2SDimitry Andric     Type *PhiTy = OrigPhi->getType();
5601db9f3b2SDimitry Andric     VectorParts RdxParts(State.UF);
5611db9f3b2SDimitry Andric     for (unsigned Part = 0; Part < State.UF; ++Part)
562*0fca6ea1SDimitry Andric       RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop());
5631db9f3b2SDimitry Andric 
5641db9f3b2SDimitry Andric     // If the vector reduction can be performed in a smaller type, we truncate
5651db9f3b2SDimitry Andric     // then extend the loop exit value to enable InstCombine to evaluate the
5661db9f3b2SDimitry Andric     // entire expression in the smaller type.
5671db9f3b2SDimitry Andric     // TODO: Handle this in truncateToMinBW.
5681db9f3b2SDimitry Andric     if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
5691db9f3b2SDimitry Andric       Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
5701db9f3b2SDimitry Andric       for (unsigned Part = 0; Part < State.UF; ++Part)
5711db9f3b2SDimitry Andric         RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
5721db9f3b2SDimitry Andric     }
5731db9f3b2SDimitry Andric     // Reduce all of the unrolled parts into a single vector.
5741db9f3b2SDimitry Andric     Value *ReducedPartRdx = RdxParts[0];
5751db9f3b2SDimitry Andric     unsigned Op = RecurrenceDescriptor::getOpcode(RK);
576*0fca6ea1SDimitry Andric     if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
577*0fca6ea1SDimitry Andric       Op = Instruction::Or;
5781db9f3b2SDimitry Andric 
5791db9f3b2SDimitry Andric     if (PhiR->isOrdered()) {
5801db9f3b2SDimitry Andric       ReducedPartRdx = RdxParts[State.UF - 1];
5811db9f3b2SDimitry Andric     } else {
5821db9f3b2SDimitry Andric       // Floating-point operations should have some FMF to enable the reduction.
5831db9f3b2SDimitry Andric       IRBuilderBase::FastMathFlagGuard FMFG(Builder);
5841db9f3b2SDimitry Andric       Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
5851db9f3b2SDimitry Andric       for (unsigned Part = 1; Part < State.UF; ++Part) {
5861db9f3b2SDimitry Andric         Value *RdxPart = RdxParts[Part];
5871db9f3b2SDimitry Andric         if (Op != Instruction::ICmp && Op != Instruction::FCmp)
5881db9f3b2SDimitry Andric           ReducedPartRdx = Builder.CreateBinOp(
5891db9f3b2SDimitry Andric               (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
590*0fca6ea1SDimitry Andric         else
5911db9f3b2SDimitry Andric           ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
5921db9f3b2SDimitry Andric       }
5931db9f3b2SDimitry Andric     }
5941db9f3b2SDimitry Andric 
5951db9f3b2SDimitry Andric     // Create the reduction after the loop. Note that inloop reductions create
5961db9f3b2SDimitry Andric     // the target reduction in the loop using a Reduction recipe.
597*0fca6ea1SDimitry Andric     if ((State.VF.isVector() ||
598*0fca6ea1SDimitry Andric          RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
599*0fca6ea1SDimitry Andric         !PhiR->isInLoop()) {
6001db9f3b2SDimitry Andric       ReducedPartRdx =
6011db9f3b2SDimitry Andric           createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
6021db9f3b2SDimitry Andric       // If the reduction can be performed in a smaller type, we need to extend
6031db9f3b2SDimitry Andric       // the reduction to the wider type before we branch to the original loop.
6041db9f3b2SDimitry Andric       if (PhiTy != RdxDesc.getRecurrenceType())
6051db9f3b2SDimitry Andric         ReducedPartRdx = RdxDesc.isSigned()
6061db9f3b2SDimitry Andric                              ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
6071db9f3b2SDimitry Andric                              : Builder.CreateZExt(ReducedPartRdx, PhiTy);
6081db9f3b2SDimitry Andric     }
6091db9f3b2SDimitry Andric 
6101db9f3b2SDimitry Andric     // If there were stores of the reduction value to a uniform memory address
6111db9f3b2SDimitry Andric     // inside the loop, create the final store here.
6121db9f3b2SDimitry Andric     if (StoreInst *SI = RdxDesc.IntermediateStore) {
6131db9f3b2SDimitry Andric       auto *NewSI = Builder.CreateAlignedStore(
6141db9f3b2SDimitry Andric           ReducedPartRdx, SI->getPointerOperand(), SI->getAlign());
6151db9f3b2SDimitry Andric       propagateMetadata(NewSI, SI);
6161db9f3b2SDimitry Andric     }
6171db9f3b2SDimitry Andric 
6181db9f3b2SDimitry Andric     return ReducedPartRdx;
6191db9f3b2SDimitry Andric   }
620*0fca6ea1SDimitry Andric   case VPInstruction::ExtractFromEnd: {
621*0fca6ea1SDimitry Andric     if (Part != 0)
622*0fca6ea1SDimitry Andric       return State.get(this, 0, /*IsScalar*/ true);
623*0fca6ea1SDimitry Andric 
624*0fca6ea1SDimitry Andric     auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
625*0fca6ea1SDimitry Andric     unsigned Offset = CI->getZExtValue();
626*0fca6ea1SDimitry Andric     assert(Offset > 0 && "Offset from end must be positive");
627*0fca6ea1SDimitry Andric     Value *Res;
628*0fca6ea1SDimitry Andric     if (State.VF.isVector()) {
629*0fca6ea1SDimitry Andric       assert(Offset <= State.VF.getKnownMinValue() &&
630*0fca6ea1SDimitry Andric              "invalid offset to extract from");
631*0fca6ea1SDimitry Andric       // Extract lane VF - Offset from the operand.
632*0fca6ea1SDimitry Andric       Res = State.get(
633*0fca6ea1SDimitry Andric           getOperand(0),
634*0fca6ea1SDimitry Andric           VPIteration(State.UF - 1, VPLane::getLaneFromEnd(State.VF, Offset)));
635*0fca6ea1SDimitry Andric     } else {
636*0fca6ea1SDimitry Andric       assert(Offset <= State.UF && "invalid offset to extract from");
637*0fca6ea1SDimitry Andric       // When loop is unrolled without vectorizing, retrieve UF - Offset.
638*0fca6ea1SDimitry Andric       Res = State.get(getOperand(0), State.UF - Offset);
639*0fca6ea1SDimitry Andric     }
640*0fca6ea1SDimitry Andric     if (isa<ExtractElementInst>(Res))
641*0fca6ea1SDimitry Andric       Res->setName(Name);
642*0fca6ea1SDimitry Andric     return Res;
643*0fca6ea1SDimitry Andric   }
644*0fca6ea1SDimitry Andric   case VPInstruction::LogicalAnd: {
645*0fca6ea1SDimitry Andric     Value *A = State.get(getOperand(0), Part);
646*0fca6ea1SDimitry Andric     Value *B = State.get(getOperand(1), Part);
647*0fca6ea1SDimitry Andric     return Builder.CreateLogicalAnd(A, B, Name);
648*0fca6ea1SDimitry Andric   }
649*0fca6ea1SDimitry Andric   case VPInstruction::PtrAdd: {
650*0fca6ea1SDimitry Andric     assert(vputils::onlyFirstLaneUsed(this) &&
651*0fca6ea1SDimitry Andric            "can only generate first lane for PtrAdd");
652*0fca6ea1SDimitry Andric     Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true);
653*0fca6ea1SDimitry Andric     Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true);
654*0fca6ea1SDimitry Andric     return Builder.CreatePtrAdd(Ptr, Addend, Name);
655*0fca6ea1SDimitry Andric   }
656*0fca6ea1SDimitry Andric   case VPInstruction::ResumePhi: {
657*0fca6ea1SDimitry Andric     if (Part != 0)
658*0fca6ea1SDimitry Andric       return State.get(this, 0, /*IsScalar*/ true);
659*0fca6ea1SDimitry Andric     Value *IncomingFromVPlanPred =
660*0fca6ea1SDimitry Andric         State.get(getOperand(0), Part, /* IsScalar */ true);
661*0fca6ea1SDimitry Andric     Value *IncomingFromOtherPreds =
662*0fca6ea1SDimitry Andric         State.get(getOperand(1), Part, /* IsScalar */ true);
663*0fca6ea1SDimitry Andric     auto *NewPhi =
664*0fca6ea1SDimitry Andric         Builder.CreatePHI(IncomingFromOtherPreds->getType(), 2, Name);
665*0fca6ea1SDimitry Andric     BasicBlock *VPlanPred =
666*0fca6ea1SDimitry Andric         State.CFG
667*0fca6ea1SDimitry Andric             .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getSinglePredecessor())];
668*0fca6ea1SDimitry Andric     NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
669*0fca6ea1SDimitry Andric     for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
670*0fca6ea1SDimitry Andric       assert(OtherPred != VPlanPred &&
671*0fca6ea1SDimitry Andric              "VPlan predecessors should not be connected yet");
672*0fca6ea1SDimitry Andric       NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);
673*0fca6ea1SDimitry Andric     }
674*0fca6ea1SDimitry Andric     return NewPhi;
675*0fca6ea1SDimitry Andric   }
676*0fca6ea1SDimitry Andric 
67781ad6265SDimitry Andric   default:
67881ad6265SDimitry Andric     llvm_unreachable("Unsupported opcode for instruction");
67981ad6265SDimitry Andric   }
68081ad6265SDimitry Andric }
68181ad6265SDimitry Andric 
682*0fca6ea1SDimitry Andric bool VPInstruction::isVectorToScalar() const {
683*0fca6ea1SDimitry Andric   return getOpcode() == VPInstruction::ExtractFromEnd ||
684*0fca6ea1SDimitry Andric          getOpcode() == VPInstruction::ComputeReductionResult;
685*0fca6ea1SDimitry Andric }
686*0fca6ea1SDimitry Andric 
687*0fca6ea1SDimitry Andric bool VPInstruction::isSingleScalar() const {
688*0fca6ea1SDimitry Andric   return getOpcode() == VPInstruction::ResumePhi;
689*0fca6ea1SDimitry Andric }
690*0fca6ea1SDimitry Andric 
6915f757f3fSDimitry Andric #if !defined(NDEBUG)
6925f757f3fSDimitry Andric bool VPInstruction::isFPMathOp() const {
6935f757f3fSDimitry Andric   // Inspired by FPMathOperator::classof. Notable differences are that we don't
6945f757f3fSDimitry Andric   // support Call, PHI and Select opcodes here yet.
6955f757f3fSDimitry Andric   return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
6965f757f3fSDimitry Andric          Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
6975f757f3fSDimitry Andric          Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
6985f757f3fSDimitry Andric          Opcode == Instruction::FCmp || Opcode == Instruction::Select;
6995f757f3fSDimitry Andric }
7005f757f3fSDimitry Andric #endif
7015f757f3fSDimitry Andric 
70281ad6265SDimitry Andric void VPInstruction::execute(VPTransformState &State) {
70381ad6265SDimitry Andric   assert(!State.Instance && "VPInstruction executing an Instance");
70481ad6265SDimitry Andric   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
7055f757f3fSDimitry Andric   assert((hasFastMathFlags() == isFPMathOp() ||
7065f757f3fSDimitry Andric           getOpcode() == Instruction::Select) &&
7075f757f3fSDimitry Andric          "Recipe not a FPMathOp but has fast-math flags?");
7085f757f3fSDimitry Andric   if (hasFastMathFlags())
7095f757f3fSDimitry Andric     State.Builder.setFastMathFlags(getFastMathFlags());
710*0fca6ea1SDimitry Andric   State.setDebugLocFrom(getDebugLoc());
711*0fca6ea1SDimitry Andric   bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
712*0fca6ea1SDimitry Andric                                    (vputils::onlyFirstLaneUsed(this) ||
713*0fca6ea1SDimitry Andric                                     isVectorToScalar() || isSingleScalar());
714*0fca6ea1SDimitry Andric   bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
715*0fca6ea1SDimitry Andric   bool OnlyFirstPartUsed = vputils::onlyFirstPartUsed(this);
71606c3fb27SDimitry Andric   for (unsigned Part = 0; Part < State.UF; ++Part) {
717*0fca6ea1SDimitry Andric     if (GeneratesPerAllLanes) {
718*0fca6ea1SDimitry Andric       for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
719*0fca6ea1SDimitry Andric            Lane != NumLanes; ++Lane) {
720*0fca6ea1SDimitry Andric         Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane));
721*0fca6ea1SDimitry Andric         assert(GeneratedValue && "generatePerLane must produce a value");
722*0fca6ea1SDimitry Andric         State.set(this, GeneratedValue, VPIteration(Part, Lane));
723*0fca6ea1SDimitry Andric       }
724*0fca6ea1SDimitry Andric       continue;
725*0fca6ea1SDimitry Andric     }
726*0fca6ea1SDimitry Andric 
727*0fca6ea1SDimitry Andric     if (Part != 0 && OnlyFirstPartUsed && hasResult()) {
728*0fca6ea1SDimitry Andric       Value *Part0 = State.get(this, 0, /*IsScalar*/ GeneratesPerFirstLaneOnly);
729*0fca6ea1SDimitry Andric       State.set(this, Part0, Part,
730*0fca6ea1SDimitry Andric                 /*IsScalar*/ GeneratesPerFirstLaneOnly);
731*0fca6ea1SDimitry Andric       continue;
732*0fca6ea1SDimitry Andric     }
733*0fca6ea1SDimitry Andric 
734*0fca6ea1SDimitry Andric     Value *GeneratedValue = generatePerPart(State, Part);
73506c3fb27SDimitry Andric     if (!hasResult())
73606c3fb27SDimitry Andric       continue;
737*0fca6ea1SDimitry Andric     assert(GeneratedValue && "generatePerPart must produce a value");
738*0fca6ea1SDimitry Andric     assert((GeneratedValue->getType()->isVectorTy() ==
739*0fca6ea1SDimitry Andric                 !GeneratesPerFirstLaneOnly ||
740*0fca6ea1SDimitry Andric             State.VF.isScalar()) &&
741*0fca6ea1SDimitry Andric            "scalar value but not only first lane defined");
742*0fca6ea1SDimitry Andric     State.set(this, GeneratedValue, Part,
743*0fca6ea1SDimitry Andric               /*IsScalar*/ GeneratesPerFirstLaneOnly);
74406c3fb27SDimitry Andric   }
74581ad6265SDimitry Andric }
74681ad6265SDimitry Andric 
747*0fca6ea1SDimitry Andric bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
748*0fca6ea1SDimitry Andric   assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
749*0fca6ea1SDimitry Andric   if (Instruction::isBinaryOp(getOpcode()))
750*0fca6ea1SDimitry Andric     return vputils::onlyFirstLaneUsed(this);
751*0fca6ea1SDimitry Andric 
752*0fca6ea1SDimitry Andric   switch (getOpcode()) {
753*0fca6ea1SDimitry Andric   default:
754*0fca6ea1SDimitry Andric     return false;
755*0fca6ea1SDimitry Andric   case Instruction::ICmp:
756*0fca6ea1SDimitry Andric   case VPInstruction::PtrAdd:
757*0fca6ea1SDimitry Andric     // TODO: Cover additional opcodes.
758*0fca6ea1SDimitry Andric     return vputils::onlyFirstLaneUsed(this);
759*0fca6ea1SDimitry Andric   case VPInstruction::ActiveLaneMask:
760*0fca6ea1SDimitry Andric   case VPInstruction::ExplicitVectorLength:
761*0fca6ea1SDimitry Andric   case VPInstruction::CalculateTripCountMinusVF:
762*0fca6ea1SDimitry Andric   case VPInstruction::CanonicalIVIncrementForPart:
763*0fca6ea1SDimitry Andric   case VPInstruction::BranchOnCount:
764*0fca6ea1SDimitry Andric   case VPInstruction::BranchOnCond:
765*0fca6ea1SDimitry Andric   case VPInstruction::ResumePhi:
766*0fca6ea1SDimitry Andric     return true;
767*0fca6ea1SDimitry Andric   };
768*0fca6ea1SDimitry Andric   llvm_unreachable("switch should return");
769*0fca6ea1SDimitry Andric }
770*0fca6ea1SDimitry Andric 
771*0fca6ea1SDimitry Andric bool VPInstruction::onlyFirstPartUsed(const VPValue *Op) const {
772*0fca6ea1SDimitry Andric   assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
773*0fca6ea1SDimitry Andric   if (Instruction::isBinaryOp(getOpcode()))
774*0fca6ea1SDimitry Andric     return vputils::onlyFirstPartUsed(this);
775*0fca6ea1SDimitry Andric 
776*0fca6ea1SDimitry Andric   switch (getOpcode()) {
777*0fca6ea1SDimitry Andric   default:
778*0fca6ea1SDimitry Andric     return false;
779*0fca6ea1SDimitry Andric   case Instruction::ICmp:
780*0fca6ea1SDimitry Andric   case Instruction::Select:
781*0fca6ea1SDimitry Andric     return vputils::onlyFirstPartUsed(this);
782*0fca6ea1SDimitry Andric   case VPInstruction::BranchOnCount:
783*0fca6ea1SDimitry Andric   case VPInstruction::BranchOnCond:
784*0fca6ea1SDimitry Andric   case VPInstruction::CanonicalIVIncrementForPart:
785*0fca6ea1SDimitry Andric     return true;
786*0fca6ea1SDimitry Andric   };
787*0fca6ea1SDimitry Andric   llvm_unreachable("switch should return");
788*0fca6ea1SDimitry Andric }
789*0fca6ea1SDimitry Andric 
79081ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
79181ad6265SDimitry Andric void VPInstruction::dump() const {
79281ad6265SDimitry Andric   VPSlotTracker SlotTracker(getParent()->getPlan());
79381ad6265SDimitry Andric   print(dbgs(), "", SlotTracker);
79481ad6265SDimitry Andric }
79581ad6265SDimitry Andric 
79681ad6265SDimitry Andric void VPInstruction::print(raw_ostream &O, const Twine &Indent,
79781ad6265SDimitry Andric                           VPSlotTracker &SlotTracker) const {
79881ad6265SDimitry Andric   O << Indent << "EMIT ";
79981ad6265SDimitry Andric 
80081ad6265SDimitry Andric   if (hasResult()) {
80181ad6265SDimitry Andric     printAsOperand(O, SlotTracker);
80281ad6265SDimitry Andric     O << " = ";
80381ad6265SDimitry Andric   }
80481ad6265SDimitry Andric 
80581ad6265SDimitry Andric   switch (getOpcode()) {
80681ad6265SDimitry Andric   case VPInstruction::Not:
80781ad6265SDimitry Andric     O << "not";
80881ad6265SDimitry Andric     break;
80981ad6265SDimitry Andric   case VPInstruction::SLPLoad:
81081ad6265SDimitry Andric     O << "combined load";
81181ad6265SDimitry Andric     break;
81281ad6265SDimitry Andric   case VPInstruction::SLPStore:
81381ad6265SDimitry Andric     O << "combined store";
81481ad6265SDimitry Andric     break;
81581ad6265SDimitry Andric   case VPInstruction::ActiveLaneMask:
81681ad6265SDimitry Andric     O << "active lane mask";
81781ad6265SDimitry Andric     break;
818*0fca6ea1SDimitry Andric   case VPInstruction::ResumePhi:
819*0fca6ea1SDimitry Andric     O << "resume-phi";
820*0fca6ea1SDimitry Andric     break;
821*0fca6ea1SDimitry Andric   case VPInstruction::ExplicitVectorLength:
822*0fca6ea1SDimitry Andric     O << "EXPLICIT-VECTOR-LENGTH";
823*0fca6ea1SDimitry Andric     break;
82481ad6265SDimitry Andric   case VPInstruction::FirstOrderRecurrenceSplice:
82581ad6265SDimitry Andric     O << "first-order splice";
82681ad6265SDimitry Andric     break;
82781ad6265SDimitry Andric   case VPInstruction::BranchOnCond:
82881ad6265SDimitry Andric     O << "branch-on-cond";
82981ad6265SDimitry Andric     break;
83006c3fb27SDimitry Andric   case VPInstruction::CalculateTripCountMinusVF:
83106c3fb27SDimitry Andric     O << "TC > VF ? TC - VF : 0";
83206c3fb27SDimitry Andric     break;
833753f127fSDimitry Andric   case VPInstruction::CanonicalIVIncrementForPart:
834753f127fSDimitry Andric     O << "VF * Part +";
835753f127fSDimitry Andric     break;
83681ad6265SDimitry Andric   case VPInstruction::BranchOnCount:
83781ad6265SDimitry Andric     O << "branch-on-count";
83881ad6265SDimitry Andric     break;
839*0fca6ea1SDimitry Andric   case VPInstruction::ExtractFromEnd:
840*0fca6ea1SDimitry Andric     O << "extract-from-end";
841*0fca6ea1SDimitry Andric     break;
8421db9f3b2SDimitry Andric   case VPInstruction::ComputeReductionResult:
8431db9f3b2SDimitry Andric     O << "compute-reduction-result";
8441db9f3b2SDimitry Andric     break;
845*0fca6ea1SDimitry Andric   case VPInstruction::LogicalAnd:
846*0fca6ea1SDimitry Andric     O << "logical-and";
847*0fca6ea1SDimitry Andric     break;
848*0fca6ea1SDimitry Andric   case VPInstruction::PtrAdd:
849*0fca6ea1SDimitry Andric     O << "ptradd";
850*0fca6ea1SDimitry Andric     break;
85181ad6265SDimitry Andric   default:
85281ad6265SDimitry Andric     O << Instruction::getOpcodeName(getOpcode());
85381ad6265SDimitry Andric   }
85481ad6265SDimitry Andric 
8555f757f3fSDimitry Andric   printFlags(O);
8565f757f3fSDimitry Andric   printOperands(O, SlotTracker);
85781ad6265SDimitry Andric 
8585f757f3fSDimitry Andric   if (auto DL = getDebugLoc()) {
85981ad6265SDimitry Andric     O << ", !dbg ";
86081ad6265SDimitry Andric     DL.print(O);
86181ad6265SDimitry Andric   }
86281ad6265SDimitry Andric }
86381ad6265SDimitry Andric #endif
86481ad6265SDimitry Andric 
865bdd1243dSDimitry Andric void VPWidenCallRecipe::execute(VPTransformState &State) {
86606c3fb27SDimitry Andric   assert(State.VF.isVector() && "not widening");
867*0fca6ea1SDimitry Andric   Function *CalledScalarFn = getCalledScalarFunction();
868*0fca6ea1SDimitry Andric   assert(!isDbgInfoIntrinsic(CalledScalarFn->getIntrinsicID()) &&
869bdd1243dSDimitry Andric          "DbgInfoIntrinsic should have been dropped during VPlan construction");
8707a6dacacSDimitry Andric   State.setDebugLocFrom(getDebugLoc());
871bdd1243dSDimitry Andric 
872647cbc5dSDimitry Andric   bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic;
8735f757f3fSDimitry Andric   FunctionType *VFTy = nullptr;
8745f757f3fSDimitry Andric   if (Variant)
8755f757f3fSDimitry Andric     VFTy = Variant->getFunctionType();
876bdd1243dSDimitry Andric   for (unsigned Part = 0; Part < State.UF; ++Part) {
87706c3fb27SDimitry Andric     SmallVector<Type *, 2> TysForDecl;
87806c3fb27SDimitry Andric     // Add return type if intrinsic is overloaded on it.
879647cbc5dSDimitry Andric     if (UseIntrinsic &&
880647cbc5dSDimitry Andric         isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
881*0fca6ea1SDimitry Andric       TysForDecl.push_back(VectorType::get(
882*0fca6ea1SDimitry Andric           CalledScalarFn->getReturnType()->getScalarType(), State.VF));
883bdd1243dSDimitry Andric     SmallVector<Value *, 4> Args;
884*0fca6ea1SDimitry Andric     for (const auto &I : enumerate(arg_operands())) {
885bdd1243dSDimitry Andric       // Some intrinsics have a scalar argument - don't replace it with a
886bdd1243dSDimitry Andric       // vector.
887bdd1243dSDimitry Andric       Value *Arg;
888b3edf446SDimitry Andric       if (UseIntrinsic &&
889b3edf446SDimitry Andric           isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
890bdd1243dSDimitry Andric         Arg = State.get(I.value(), VPIteration(0, 0));
891b3edf446SDimitry Andric       // Some vectorized function variants may also take a scalar argument,
892b3edf446SDimitry Andric       // e.g. linear parameters for pointers. This needs to be the scalar value
893b3edf446SDimitry Andric       // from the start of the respective part when interleaving.
894b3edf446SDimitry Andric       else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy())
895b3edf446SDimitry Andric         Arg = State.get(I.value(), VPIteration(Part, 0));
8965f757f3fSDimitry Andric       else
8975f757f3fSDimitry Andric         Arg = State.get(I.value(), Part);
898647cbc5dSDimitry Andric       if (UseIntrinsic &&
899647cbc5dSDimitry Andric           isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
900bdd1243dSDimitry Andric         TysForDecl.push_back(Arg->getType());
901bdd1243dSDimitry Andric       Args.push_back(Arg);
902bdd1243dSDimitry Andric     }
903bdd1243dSDimitry Andric 
904bdd1243dSDimitry Andric     Function *VectorF;
905647cbc5dSDimitry Andric     if (UseIntrinsic) {
906bdd1243dSDimitry Andric       // Use vector version of the intrinsic.
907bdd1243dSDimitry Andric       Module *M = State.Builder.GetInsertBlock()->getModule();
908bdd1243dSDimitry Andric       VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
909bdd1243dSDimitry Andric       assert(VectorF && "Can't retrieve vector intrinsic.");
910bdd1243dSDimitry Andric     } else {
911bdd1243dSDimitry Andric #ifndef NDEBUG
91206c3fb27SDimitry Andric       assert(Variant != nullptr && "Can't create vector function.");
913bdd1243dSDimitry Andric #endif
91406c3fb27SDimitry Andric       VectorF = Variant;
915bdd1243dSDimitry Andric     }
91606c3fb27SDimitry Andric 
917*0fca6ea1SDimitry Andric     auto *CI = cast_or_null<CallInst>(getUnderlyingInstr());
918bdd1243dSDimitry Andric     SmallVector<OperandBundleDef, 1> OpBundles;
919*0fca6ea1SDimitry Andric     if (CI)
920*0fca6ea1SDimitry Andric       CI->getOperandBundlesAsDefs(OpBundles);
921*0fca6ea1SDimitry Andric 
922bdd1243dSDimitry Andric     CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
923bdd1243dSDimitry Andric 
924bdd1243dSDimitry Andric     if (isa<FPMathOperator>(V))
925*0fca6ea1SDimitry Andric       V->copyFastMathFlags(CI);
926bdd1243dSDimitry Andric 
927*0fca6ea1SDimitry Andric     if (!V->getType()->isVoidTy())
928bdd1243dSDimitry Andric       State.set(this, V, Part);
929*0fca6ea1SDimitry Andric     State.addMetadata(V, CI);
930bdd1243dSDimitry Andric   }
931bdd1243dSDimitry Andric }
932bdd1243dSDimitry Andric 
93381ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
93481ad6265SDimitry Andric void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
93581ad6265SDimitry Andric                               VPSlotTracker &SlotTracker) const {
93681ad6265SDimitry Andric   O << Indent << "WIDEN-CALL ";
93781ad6265SDimitry Andric 
938*0fca6ea1SDimitry Andric   Function *CalledFn = getCalledScalarFunction();
939*0fca6ea1SDimitry Andric   if (CalledFn->getReturnType()->isVoidTy())
94081ad6265SDimitry Andric     O << "void ";
94181ad6265SDimitry Andric   else {
94281ad6265SDimitry Andric     printAsOperand(O, SlotTracker);
94381ad6265SDimitry Andric     O << " = ";
94481ad6265SDimitry Andric   }
94581ad6265SDimitry Andric 
946*0fca6ea1SDimitry Andric   O << "call @" << CalledFn->getName() << "(";
947*0fca6ea1SDimitry Andric   interleaveComma(arg_operands(), O, [&O, &SlotTracker](VPValue *Op) {
948*0fca6ea1SDimitry Andric     Op->printAsOperand(O, SlotTracker);
949*0fca6ea1SDimitry Andric   });
95081ad6265SDimitry Andric   O << ")";
951bdd1243dSDimitry Andric 
952bdd1243dSDimitry Andric   if (VectorIntrinsicID)
953bdd1243dSDimitry Andric     O << " (using vector intrinsic)";
95406c3fb27SDimitry Andric   else {
95506c3fb27SDimitry Andric     O << " (using library function";
95606c3fb27SDimitry Andric     if (Variant->hasName())
95706c3fb27SDimitry Andric       O << ": " << Variant->getName();
95806c3fb27SDimitry Andric     O << ")";
95906c3fb27SDimitry Andric   }
96081ad6265SDimitry Andric }
96181ad6265SDimitry Andric 
96281ad6265SDimitry Andric void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
96381ad6265SDimitry Andric                                 VPSlotTracker &SlotTracker) const {
96481ad6265SDimitry Andric   O << Indent << "WIDEN-SELECT ";
96581ad6265SDimitry Andric   printAsOperand(O, SlotTracker);
96681ad6265SDimitry Andric   O << " = select ";
96781ad6265SDimitry Andric   getOperand(0)->printAsOperand(O, SlotTracker);
96881ad6265SDimitry Andric   O << ", ";
96981ad6265SDimitry Andric   getOperand(1)->printAsOperand(O, SlotTracker);
97081ad6265SDimitry Andric   O << ", ";
97181ad6265SDimitry Andric   getOperand(2)->printAsOperand(O, SlotTracker);
97206c3fb27SDimitry Andric   O << (isInvariantCond() ? " (condition is loop invariant)" : "");
97381ad6265SDimitry Andric }
974753f127fSDimitry Andric #endif
97581ad6265SDimitry Andric 
976753f127fSDimitry Andric void VPWidenSelectRecipe::execute(VPTransformState &State) {
9775f757f3fSDimitry Andric   State.setDebugLocFrom(getDebugLoc());
978753f127fSDimitry Andric 
979753f127fSDimitry Andric   // The condition can be loop invariant but still defined inside the
980753f127fSDimitry Andric   // loop. This means that we can't just use the original 'cond' value.
981753f127fSDimitry Andric   // We have to take the 'vectorized' value and pick the first lane.
982753f127fSDimitry Andric   // Instcombine will make this a no-op.
983753f127fSDimitry Andric   auto *InvarCond =
98406c3fb27SDimitry Andric       isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
985753f127fSDimitry Andric 
986753f127fSDimitry Andric   for (unsigned Part = 0; Part < State.UF; ++Part) {
98706c3fb27SDimitry Andric     Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part);
988753f127fSDimitry Andric     Value *Op0 = State.get(getOperand(1), Part);
989753f127fSDimitry Andric     Value *Op1 = State.get(getOperand(2), Part);
990753f127fSDimitry Andric     Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
991753f127fSDimitry Andric     State.set(this, Sel, Part);
9925f757f3fSDimitry Andric     State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
993753f127fSDimitry Andric   }
994753f127fSDimitry Andric }
995753f127fSDimitry Andric 
9965f757f3fSDimitry Andric VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
9975f757f3fSDimitry Andric     const FastMathFlags &FMF) {
9985f757f3fSDimitry Andric   AllowReassoc = FMF.allowReassoc();
9995f757f3fSDimitry Andric   NoNaNs = FMF.noNaNs();
10005f757f3fSDimitry Andric   NoInfs = FMF.noInfs();
10015f757f3fSDimitry Andric   NoSignedZeros = FMF.noSignedZeros();
10025f757f3fSDimitry Andric   AllowReciprocal = FMF.allowReciprocal();
10035f757f3fSDimitry Andric   AllowContract = FMF.allowContract();
10045f757f3fSDimitry Andric   ApproxFunc = FMF.approxFunc();
10055f757f3fSDimitry Andric }
10065f757f3fSDimitry Andric 
100706c3fb27SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
100806c3fb27SDimitry Andric void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
100906c3fb27SDimitry Andric   switch (OpType) {
10105f757f3fSDimitry Andric   case OperationType::Cmp:
10115f757f3fSDimitry Andric     O << " " << CmpInst::getPredicateName(getPredicate());
10125f757f3fSDimitry Andric     break;
10135f757f3fSDimitry Andric   case OperationType::DisjointOp:
10145f757f3fSDimitry Andric     if (DisjointFlags.IsDisjoint)
10155f757f3fSDimitry Andric       O << " disjoint";
10165f757f3fSDimitry Andric     break;
101706c3fb27SDimitry Andric   case OperationType::PossiblyExactOp:
101806c3fb27SDimitry Andric     if (ExactFlags.IsExact)
101906c3fb27SDimitry Andric       O << " exact";
102006c3fb27SDimitry Andric     break;
102106c3fb27SDimitry Andric   case OperationType::OverflowingBinOp:
102206c3fb27SDimitry Andric     if (WrapFlags.HasNUW)
102306c3fb27SDimitry Andric       O << " nuw";
102406c3fb27SDimitry Andric     if (WrapFlags.HasNSW)
102506c3fb27SDimitry Andric       O << " nsw";
102606c3fb27SDimitry Andric     break;
102706c3fb27SDimitry Andric   case OperationType::FPMathOp:
102806c3fb27SDimitry Andric     getFastMathFlags().print(O);
102906c3fb27SDimitry Andric     break;
103006c3fb27SDimitry Andric   case OperationType::GEPOp:
103106c3fb27SDimitry Andric     if (GEPFlags.IsInBounds)
103206c3fb27SDimitry Andric       O << " inbounds";
103306c3fb27SDimitry Andric     break;
10345f757f3fSDimitry Andric   case OperationType::NonNegOp:
10355f757f3fSDimitry Andric     if (NonNegFlags.NonNeg)
10365f757f3fSDimitry Andric       O << " nneg";
10375f757f3fSDimitry Andric     break;
103806c3fb27SDimitry Andric   case OperationType::Other:
103906c3fb27SDimitry Andric     break;
104006c3fb27SDimitry Andric   }
10415f757f3fSDimitry Andric   if (getNumOperands() > 0)
104206c3fb27SDimitry Andric     O << " ";
104306c3fb27SDimitry Andric }
104406c3fb27SDimitry Andric #endif
104506c3fb27SDimitry Andric 
1046753f127fSDimitry Andric void VPWidenRecipe::execute(VPTransformState &State) {
10475f757f3fSDimitry Andric   State.setDebugLocFrom(getDebugLoc());
1048753f127fSDimitry Andric   auto &Builder = State.Builder;
10495f757f3fSDimitry Andric   switch (Opcode) {
1050753f127fSDimitry Andric   case Instruction::Call:
1051753f127fSDimitry Andric   case Instruction::Br:
1052753f127fSDimitry Andric   case Instruction::PHI:
1053753f127fSDimitry Andric   case Instruction::GetElementPtr:
1054753f127fSDimitry Andric   case Instruction::Select:
1055753f127fSDimitry Andric     llvm_unreachable("This instruction is handled by a different recipe.");
1056753f127fSDimitry Andric   case Instruction::UDiv:
1057753f127fSDimitry Andric   case Instruction::SDiv:
1058753f127fSDimitry Andric   case Instruction::SRem:
1059753f127fSDimitry Andric   case Instruction::URem:
1060753f127fSDimitry Andric   case Instruction::Add:
1061753f127fSDimitry Andric   case Instruction::FAdd:
1062753f127fSDimitry Andric   case Instruction::Sub:
1063753f127fSDimitry Andric   case Instruction::FSub:
1064753f127fSDimitry Andric   case Instruction::FNeg:
1065753f127fSDimitry Andric   case Instruction::Mul:
1066753f127fSDimitry Andric   case Instruction::FMul:
1067753f127fSDimitry Andric   case Instruction::FDiv:
1068753f127fSDimitry Andric   case Instruction::FRem:
1069753f127fSDimitry Andric   case Instruction::Shl:
1070753f127fSDimitry Andric   case Instruction::LShr:
1071753f127fSDimitry Andric   case Instruction::AShr:
1072753f127fSDimitry Andric   case Instruction::And:
1073753f127fSDimitry Andric   case Instruction::Or:
1074753f127fSDimitry Andric   case Instruction::Xor: {
1075753f127fSDimitry Andric     // Just widen unops and binops.
1076753f127fSDimitry Andric     for (unsigned Part = 0; Part < State.UF; ++Part) {
1077753f127fSDimitry Andric       SmallVector<Value *, 2> Ops;
1078753f127fSDimitry Andric       for (VPValue *VPOp : operands())
1079753f127fSDimitry Andric         Ops.push_back(State.get(VPOp, Part));
1080753f127fSDimitry Andric 
10815f757f3fSDimitry Andric       Value *V = Builder.CreateNAryOp(Opcode, Ops);
1082753f127fSDimitry Andric 
108306c3fb27SDimitry Andric       if (auto *VecOp = dyn_cast<Instruction>(V))
108406c3fb27SDimitry Andric         setFlags(VecOp);
1085753f127fSDimitry Andric 
1086753f127fSDimitry Andric       // Use this vector value for all users of the original instruction.
1087753f127fSDimitry Andric       State.set(this, V, Part);
10885f757f3fSDimitry Andric       State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1089753f127fSDimitry Andric     }
1090753f127fSDimitry Andric 
1091753f127fSDimitry Andric     break;
1092753f127fSDimitry Andric   }
1093753f127fSDimitry Andric   case Instruction::Freeze: {
1094753f127fSDimitry Andric     for (unsigned Part = 0; Part < State.UF; ++Part) {
1095753f127fSDimitry Andric       Value *Op = State.get(getOperand(0), Part);
1096753f127fSDimitry Andric 
1097753f127fSDimitry Andric       Value *Freeze = Builder.CreateFreeze(Op);
1098753f127fSDimitry Andric       State.set(this, Freeze, Part);
1099753f127fSDimitry Andric     }
1100753f127fSDimitry Andric     break;
1101753f127fSDimitry Andric   }
1102753f127fSDimitry Andric   case Instruction::ICmp:
1103753f127fSDimitry Andric   case Instruction::FCmp: {
1104753f127fSDimitry Andric     // Widen compares. Generate vector compares.
11055f757f3fSDimitry Andric     bool FCmp = Opcode == Instruction::FCmp;
1106753f127fSDimitry Andric     for (unsigned Part = 0; Part < State.UF; ++Part) {
1107753f127fSDimitry Andric       Value *A = State.get(getOperand(0), Part);
1108753f127fSDimitry Andric       Value *B = State.get(getOperand(1), Part);
1109753f127fSDimitry Andric       Value *C = nullptr;
1110753f127fSDimitry Andric       if (FCmp) {
1111753f127fSDimitry Andric         // Propagate fast math flags.
1112753f127fSDimitry Andric         IRBuilder<>::FastMathFlagGuard FMFG(Builder);
11135f757f3fSDimitry Andric         if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
11145f757f3fSDimitry Andric           Builder.setFastMathFlags(I->getFastMathFlags());
11155f757f3fSDimitry Andric         C = Builder.CreateFCmp(getPredicate(), A, B);
1116753f127fSDimitry Andric       } else {
11175f757f3fSDimitry Andric         C = Builder.CreateICmp(getPredicate(), A, B);
1118753f127fSDimitry Andric       }
1119753f127fSDimitry Andric       State.set(this, C, Part);
11205f757f3fSDimitry Andric       State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1121753f127fSDimitry Andric     }
1122753f127fSDimitry Andric 
1123753f127fSDimitry Andric     break;
1124753f127fSDimitry Andric   }
1125753f127fSDimitry Andric   default:
1126753f127fSDimitry Andric     // This instruction is not vectorized by simple widening.
11275f757f3fSDimitry Andric     LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
11285f757f3fSDimitry Andric                       << Instruction::getOpcodeName(Opcode));
1129753f127fSDimitry Andric     llvm_unreachable("Unhandled instruction!");
1130753f127fSDimitry Andric   } // end of switch.
11315f757f3fSDimitry Andric 
11325f757f3fSDimitry Andric #if !defined(NDEBUG)
11335f757f3fSDimitry Andric   // Verify that VPlan type inference results agree with the type of the
11345f757f3fSDimitry Andric   // generated values.
11355f757f3fSDimitry Andric   for (unsigned Part = 0; Part < State.UF; ++Part) {
11365f757f3fSDimitry Andric     assert(VectorType::get(State.TypeAnalysis.inferScalarType(this),
11375f757f3fSDimitry Andric                            State.VF) == State.get(this, Part)->getType() &&
11385f757f3fSDimitry Andric            "inferred type and type from generated instructions do not match");
1139753f127fSDimitry Andric   }
11405f757f3fSDimitry Andric #endif
11415f757f3fSDimitry Andric }
11425f757f3fSDimitry Andric 
1143753f127fSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
114481ad6265SDimitry Andric void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
114581ad6265SDimitry Andric                           VPSlotTracker &SlotTracker) const {
114681ad6265SDimitry Andric   O << Indent << "WIDEN ";
114781ad6265SDimitry Andric   printAsOperand(O, SlotTracker);
11485f757f3fSDimitry Andric   O << " = " << Instruction::getOpcodeName(Opcode);
114906c3fb27SDimitry Andric   printFlags(O);
115081ad6265SDimitry Andric   printOperands(O, SlotTracker);
115181ad6265SDimitry Andric }
115206c3fb27SDimitry Andric #endif
115306c3fb27SDimitry Andric 
115406c3fb27SDimitry Andric void VPWidenCastRecipe::execute(VPTransformState &State) {
11555f757f3fSDimitry Andric   State.setDebugLocFrom(getDebugLoc());
115606c3fb27SDimitry Andric   auto &Builder = State.Builder;
115706c3fb27SDimitry Andric   /// Vectorize casts.
115806c3fb27SDimitry Andric   assert(State.VF.isVector() && "Not vectorizing?");
115906c3fb27SDimitry Andric   Type *DestTy = VectorType::get(getResultType(), State.VF);
11605f757f3fSDimitry Andric   VPValue *Op = getOperand(0);
116106c3fb27SDimitry Andric   for (unsigned Part = 0; Part < State.UF; ++Part) {
11625f757f3fSDimitry Andric     if (Part > 0 && Op->isLiveIn()) {
11635f757f3fSDimitry Andric       // FIXME: Remove once explicit unrolling is implemented using VPlan.
11645f757f3fSDimitry Andric       State.set(this, State.get(this, 0), Part);
11655f757f3fSDimitry Andric       continue;
11665f757f3fSDimitry Andric     }
11675f757f3fSDimitry Andric     Value *A = State.get(Op, Part);
116806c3fb27SDimitry Andric     Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
116906c3fb27SDimitry Andric     State.set(this, Cast, Part);
11705f757f3fSDimitry Andric     State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
117106c3fb27SDimitry Andric   }
117206c3fb27SDimitry Andric }
117306c3fb27SDimitry Andric 
117406c3fb27SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
117506c3fb27SDimitry Andric void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
117606c3fb27SDimitry Andric                               VPSlotTracker &SlotTracker) const {
117706c3fb27SDimitry Andric   O << Indent << "WIDEN-CAST ";
117806c3fb27SDimitry Andric   printAsOperand(O, SlotTracker);
117906c3fb27SDimitry Andric   O << " = " << Instruction::getOpcodeName(Opcode) << " ";
11805f757f3fSDimitry Andric   printFlags(O);
118106c3fb27SDimitry Andric   printOperands(O, SlotTracker);
118206c3fb27SDimitry Andric   O << " to " << *getResultType();
118306c3fb27SDimitry Andric }
11845f757f3fSDimitry Andric #endif
118581ad6265SDimitry Andric 
11865f757f3fSDimitry Andric /// This function adds
11875f757f3fSDimitry Andric /// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
11885f757f3fSDimitry Andric /// to each vector element of Val. The sequence starts at StartIndex.
11895f757f3fSDimitry Andric /// \p Opcode is relevant for FP induction variable.
11905f757f3fSDimitry Andric static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
11915f757f3fSDimitry Andric                             Instruction::BinaryOps BinOp, ElementCount VF,
11925f757f3fSDimitry Andric                             IRBuilderBase &Builder) {
11935f757f3fSDimitry Andric   assert(VF.isVector() && "only vector VFs are supported");
11945f757f3fSDimitry Andric 
11955f757f3fSDimitry Andric   // Create and check the types.
11965f757f3fSDimitry Andric   auto *ValVTy = cast<VectorType>(Val->getType());
11975f757f3fSDimitry Andric   ElementCount VLen = ValVTy->getElementCount();
11985f757f3fSDimitry Andric 
11995f757f3fSDimitry Andric   Type *STy = Val->getType()->getScalarType();
12005f757f3fSDimitry Andric   assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
12015f757f3fSDimitry Andric          "Induction Step must be an integer or FP");
12025f757f3fSDimitry Andric   assert(Step->getType() == STy && "Step has wrong type");
12035f757f3fSDimitry Andric 
12045f757f3fSDimitry Andric   SmallVector<Constant *, 8> Indices;
12055f757f3fSDimitry Andric 
12065f757f3fSDimitry Andric   // Create a vector of consecutive numbers from zero to VF.
12075f757f3fSDimitry Andric   VectorType *InitVecValVTy = ValVTy;
12085f757f3fSDimitry Andric   if (STy->isFloatingPointTy()) {
12095f757f3fSDimitry Andric     Type *InitVecValSTy =
12105f757f3fSDimitry Andric         IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());
12115f757f3fSDimitry Andric     InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
12125f757f3fSDimitry Andric   }
12135f757f3fSDimitry Andric   Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
12145f757f3fSDimitry Andric 
12155f757f3fSDimitry Andric   // Splat the StartIdx
12165f757f3fSDimitry Andric   Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
12175f757f3fSDimitry Andric 
12185f757f3fSDimitry Andric   if (STy->isIntegerTy()) {
12195f757f3fSDimitry Andric     InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
12205f757f3fSDimitry Andric     Step = Builder.CreateVectorSplat(VLen, Step);
12215f757f3fSDimitry Andric     assert(Step->getType() == Val->getType() && "Invalid step vec");
12225f757f3fSDimitry Andric     // FIXME: The newly created binary instructions should contain nsw/nuw
12235f757f3fSDimitry Andric     // flags, which can be found from the original scalar operations.
12245f757f3fSDimitry Andric     Step = Builder.CreateMul(InitVec, Step);
12255f757f3fSDimitry Andric     return Builder.CreateAdd(Val, Step, "induction");
12265f757f3fSDimitry Andric   }
12275f757f3fSDimitry Andric 
12285f757f3fSDimitry Andric   // Floating point induction.
12295f757f3fSDimitry Andric   assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
12305f757f3fSDimitry Andric          "Binary Opcode should be specified for FP induction");
12315f757f3fSDimitry Andric   InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
12325f757f3fSDimitry Andric   InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
12335f757f3fSDimitry Andric 
12345f757f3fSDimitry Andric   Step = Builder.CreateVectorSplat(VLen, Step);
12355f757f3fSDimitry Andric   Value *MulOp = Builder.CreateFMul(InitVec, Step);
12365f757f3fSDimitry Andric   return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
12375f757f3fSDimitry Andric }
12385f757f3fSDimitry Andric 
12395f757f3fSDimitry Andric /// A helper function that returns an integer or floating-point constant with
12405f757f3fSDimitry Andric /// value C.
12415f757f3fSDimitry Andric static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
12425f757f3fSDimitry Andric   return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
12435f757f3fSDimitry Andric                            : ConstantFP::get(Ty, C);
12445f757f3fSDimitry Andric }
12455f757f3fSDimitry Andric 
12465f757f3fSDimitry Andric static Value *getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy,
12475f757f3fSDimitry Andric                                   ElementCount VF) {
12485f757f3fSDimitry Andric   assert(FTy->isFloatingPointTy() && "Expected floating point type!");
12495f757f3fSDimitry Andric   Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
12505f757f3fSDimitry Andric   Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
12515f757f3fSDimitry Andric   return B.CreateUIToFP(RuntimeVF, FTy);
12525f757f3fSDimitry Andric }
12535f757f3fSDimitry Andric 
12545f757f3fSDimitry Andric void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
12555f757f3fSDimitry Andric   assert(!State.Instance && "Int or FP induction being replicated.");
12565f757f3fSDimitry Andric 
12575f757f3fSDimitry Andric   Value *Start = getStartValue()->getLiveInIRValue();
12585f757f3fSDimitry Andric   const InductionDescriptor &ID = getInductionDescriptor();
12595f757f3fSDimitry Andric   TruncInst *Trunc = getTruncInst();
12605f757f3fSDimitry Andric   IRBuilderBase &Builder = State.Builder;
12615f757f3fSDimitry Andric   assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
12625f757f3fSDimitry Andric   assert(State.VF.isVector() && "must have vector VF");
12635f757f3fSDimitry Andric 
12645f757f3fSDimitry Andric   // The value from the original loop to which we are mapping the new induction
12655f757f3fSDimitry Andric   // variable.
12665f757f3fSDimitry Andric   Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
12675f757f3fSDimitry Andric 
12685f757f3fSDimitry Andric   // Fast-math-flags propagate from the original induction instruction.
12695f757f3fSDimitry Andric   IRBuilder<>::FastMathFlagGuard FMFG(Builder);
12705f757f3fSDimitry Andric   if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
12715f757f3fSDimitry Andric     Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
12725f757f3fSDimitry Andric 
12735f757f3fSDimitry Andric   // Now do the actual transformations, and start with fetching the step value.
12745f757f3fSDimitry Andric   Value *Step = State.get(getStepValue(), VPIteration(0, 0));
12755f757f3fSDimitry Andric 
12765f757f3fSDimitry Andric   assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
12775f757f3fSDimitry Andric          "Expected either an induction phi-node or a truncate of it!");
12785f757f3fSDimitry Andric 
12795f757f3fSDimitry Andric   // Construct the initial value of the vector IV in the vector loop preheader
12805f757f3fSDimitry Andric   auto CurrIP = Builder.saveIP();
12815f757f3fSDimitry Andric   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
12825f757f3fSDimitry Andric   Builder.SetInsertPoint(VectorPH->getTerminator());
12835f757f3fSDimitry Andric   if (isa<TruncInst>(EntryVal)) {
12845f757f3fSDimitry Andric     assert(Start->getType()->isIntegerTy() &&
12855f757f3fSDimitry Andric            "Truncation requires an integer type");
12865f757f3fSDimitry Andric     auto *TruncType = cast<IntegerType>(EntryVal->getType());
12875f757f3fSDimitry Andric     Step = Builder.CreateTrunc(Step, TruncType);
12885f757f3fSDimitry Andric     Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
12895f757f3fSDimitry Andric   }
12905f757f3fSDimitry Andric 
12915f757f3fSDimitry Andric   Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
12925f757f3fSDimitry Andric   Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
12935f757f3fSDimitry Andric   Value *SteppedStart = getStepVector(
12945f757f3fSDimitry Andric       SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
12955f757f3fSDimitry Andric 
12965f757f3fSDimitry Andric   // We create vector phi nodes for both integer and floating-point induction
12975f757f3fSDimitry Andric   // variables. Here, we determine the kind of arithmetic we will perform.
12985f757f3fSDimitry Andric   Instruction::BinaryOps AddOp;
12995f757f3fSDimitry Andric   Instruction::BinaryOps MulOp;
13005f757f3fSDimitry Andric   if (Step->getType()->isIntegerTy()) {
13015f757f3fSDimitry Andric     AddOp = Instruction::Add;
13025f757f3fSDimitry Andric     MulOp = Instruction::Mul;
13035f757f3fSDimitry Andric   } else {
13045f757f3fSDimitry Andric     AddOp = ID.getInductionOpcode();
13055f757f3fSDimitry Andric     MulOp = Instruction::FMul;
13065f757f3fSDimitry Andric   }
13075f757f3fSDimitry Andric 
13085f757f3fSDimitry Andric   // Multiply the vectorization factor by the step using integer or
13095f757f3fSDimitry Andric   // floating-point arithmetic as appropriate.
13105f757f3fSDimitry Andric   Type *StepType = Step->getType();
13115f757f3fSDimitry Andric   Value *RuntimeVF;
13125f757f3fSDimitry Andric   if (Step->getType()->isFloatingPointTy())
13135f757f3fSDimitry Andric     RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
13145f757f3fSDimitry Andric   else
13155f757f3fSDimitry Andric     RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
13165f757f3fSDimitry Andric   Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
13175f757f3fSDimitry Andric 
13185f757f3fSDimitry Andric   // Create a vector splat to use in the induction update.
13195f757f3fSDimitry Andric   //
13205f757f3fSDimitry Andric   // FIXME: If the step is non-constant, we create the vector splat with
13215f757f3fSDimitry Andric   //        IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
13225f757f3fSDimitry Andric   //        handle a constant vector splat.
13235f757f3fSDimitry Andric   Value *SplatVF = isa<Constant>(Mul)
13245f757f3fSDimitry Andric                        ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
13255f757f3fSDimitry Andric                        : Builder.CreateVectorSplat(State.VF, Mul);
13265f757f3fSDimitry Andric   Builder.restoreIP(CurrIP);
13275f757f3fSDimitry Andric 
13285f757f3fSDimitry Andric   // We may need to add the step a number of times, depending on the unroll
13295f757f3fSDimitry Andric   // factor. The last of those goes into the PHI.
13305f757f3fSDimitry Andric   PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
13315f757f3fSDimitry Andric   VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
13325f757f3fSDimitry Andric   VecInd->setDebugLoc(EntryVal->getDebugLoc());
13335f757f3fSDimitry Andric   Instruction *LastInduction = VecInd;
13345f757f3fSDimitry Andric   for (unsigned Part = 0; Part < State.UF; ++Part) {
13355f757f3fSDimitry Andric     State.set(this, LastInduction, Part);
13365f757f3fSDimitry Andric 
13375f757f3fSDimitry Andric     if (isa<TruncInst>(EntryVal))
13385f757f3fSDimitry Andric       State.addMetadata(LastInduction, EntryVal);
13395f757f3fSDimitry Andric 
13405f757f3fSDimitry Andric     LastInduction = cast<Instruction>(
13415f757f3fSDimitry Andric         Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
13425f757f3fSDimitry Andric     LastInduction->setDebugLoc(EntryVal->getDebugLoc());
13435f757f3fSDimitry Andric   }
13445f757f3fSDimitry Andric 
13455f757f3fSDimitry Andric   LastInduction->setName("vec.ind.next");
13465f757f3fSDimitry Andric   VecInd->addIncoming(SteppedStart, VectorPH);
13475f757f3fSDimitry Andric   // Add induction update using an incorrect block temporarily. The phi node
13485f757f3fSDimitry Andric   // will be fixed after VPlan execution. Note that at this point the latch
13495f757f3fSDimitry Andric   // block cannot be used, as it does not exist yet.
13505f757f3fSDimitry Andric   // TODO: Model increment value in VPlan, by turning the recipe into a
13515f757f3fSDimitry Andric   // multi-def and a subclass of VPHeaderPHIRecipe.
13525f757f3fSDimitry Andric   VecInd->addIncoming(LastInduction, VectorPH);
13535f757f3fSDimitry Andric }
13545f757f3fSDimitry Andric 
13555f757f3fSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
135681ad6265SDimitry Andric void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
135781ad6265SDimitry Andric                                           VPSlotTracker &SlotTracker) const {
135881ad6265SDimitry Andric   O << Indent << "WIDEN-INDUCTION";
135981ad6265SDimitry Andric   if (getTruncInst()) {
136081ad6265SDimitry Andric     O << "\\l\"";
136181ad6265SDimitry Andric     O << " +\n" << Indent << "\"  " << VPlanIngredient(IV) << "\\l\"";
136281ad6265SDimitry Andric     O << " +\n" << Indent << "\"  ";
136381ad6265SDimitry Andric     getVPValue(0)->printAsOperand(O, SlotTracker);
136481ad6265SDimitry Andric   } else
136581ad6265SDimitry Andric     O << " " << VPlanIngredient(IV);
136681ad6265SDimitry Andric 
136781ad6265SDimitry Andric   O << ", ";
136881ad6265SDimitry Andric   getStepValue()->printAsOperand(O, SlotTracker);
136981ad6265SDimitry Andric }
137081ad6265SDimitry Andric #endif
137181ad6265SDimitry Andric 
137281ad6265SDimitry Andric bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
137306c3fb27SDimitry Andric   // The step may be defined by a recipe in the preheader (e.g. if it requires
137406c3fb27SDimitry Andric   // SCEV expansion), but for the canonical induction the step is required to be
137506c3fb27SDimitry Andric   // 1, which is represented as live-in.
137606c3fb27SDimitry Andric   if (getStepValue()->getDefiningRecipe())
137706c3fb27SDimitry Andric     return false;
137806c3fb27SDimitry Andric   auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
137981ad6265SDimitry Andric   auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1380*0fca6ea1SDimitry Andric   auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
1381*0fca6ea1SDimitry Andric   return StartC && StartC->isZero() && StepC && StepC->isOne() &&
1382*0fca6ea1SDimitry Andric          getScalarType() == CanIV->getScalarType();
138381ad6265SDimitry Andric }
138481ad6265SDimitry Andric 
1385bdd1243dSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1386bdd1243dSDimitry Andric void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent,
1387bdd1243dSDimitry Andric                               VPSlotTracker &SlotTracker) const {
1388bdd1243dSDimitry Andric   O << Indent;
1389bdd1243dSDimitry Andric   printAsOperand(O, SlotTracker);
1390bdd1243dSDimitry Andric   O << Indent << "= DERIVED-IV ";
1391bdd1243dSDimitry Andric   getStartValue()->printAsOperand(O, SlotTracker);
1392bdd1243dSDimitry Andric   O << " + ";
1393*0fca6ea1SDimitry Andric   getOperand(1)->printAsOperand(O, SlotTracker);
1394bdd1243dSDimitry Andric   O << " * ";
1395bdd1243dSDimitry Andric   getStepValue()->printAsOperand(O, SlotTracker);
139681ad6265SDimitry Andric }
1397bdd1243dSDimitry Andric #endif
139881ad6265SDimitry Andric 
13995f757f3fSDimitry Andric void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
14005f757f3fSDimitry Andric   // Fast-math-flags propagate from the original induction instruction.
14015f757f3fSDimitry Andric   IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
14025f757f3fSDimitry Andric   if (hasFastMathFlags())
14035f757f3fSDimitry Andric     State.Builder.setFastMathFlags(getFastMathFlags());
14045f757f3fSDimitry Andric 
14055f757f3fSDimitry Andric   /// Compute scalar induction steps. \p ScalarIV is the scalar induction
14065f757f3fSDimitry Andric   /// variable on which to base the steps, \p Step is the size of the step.
14075f757f3fSDimitry Andric 
14085f757f3fSDimitry Andric   Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0));
14095f757f3fSDimitry Andric   Value *Step = State.get(getStepValue(), VPIteration(0, 0));
14105f757f3fSDimitry Andric   IRBuilderBase &Builder = State.Builder;
14115f757f3fSDimitry Andric 
14125f757f3fSDimitry Andric   // Ensure step has the same type as that of scalar IV.
14135f757f3fSDimitry Andric   Type *BaseIVTy = BaseIV->getType()->getScalarType();
1414*0fca6ea1SDimitry Andric   assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
14155f757f3fSDimitry Andric 
14165f757f3fSDimitry Andric   // We build scalar steps for both integer and floating-point induction
14175f757f3fSDimitry Andric   // variables. Here, we determine the kind of arithmetic we will perform.
14185f757f3fSDimitry Andric   Instruction::BinaryOps AddOp;
14195f757f3fSDimitry Andric   Instruction::BinaryOps MulOp;
14205f757f3fSDimitry Andric   if (BaseIVTy->isIntegerTy()) {
14215f757f3fSDimitry Andric     AddOp = Instruction::Add;
14225f757f3fSDimitry Andric     MulOp = Instruction::Mul;
14235f757f3fSDimitry Andric   } else {
14245f757f3fSDimitry Andric     AddOp = InductionOpcode;
14255f757f3fSDimitry Andric     MulOp = Instruction::FMul;
14265f757f3fSDimitry Andric   }
14275f757f3fSDimitry Andric 
14285f757f3fSDimitry Andric   // Determine the number of scalars we need to generate for each unroll
14295f757f3fSDimitry Andric   // iteration.
14305f757f3fSDimitry Andric   bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
14315f757f3fSDimitry Andric   // Compute the scalar steps and save the results in State.
14325f757f3fSDimitry Andric   Type *IntStepTy =
14335f757f3fSDimitry Andric       IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
14345f757f3fSDimitry Andric   Type *VecIVTy = nullptr;
14355f757f3fSDimitry Andric   Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
14365f757f3fSDimitry Andric   if (!FirstLaneOnly && State.VF.isScalable()) {
14375f757f3fSDimitry Andric     VecIVTy = VectorType::get(BaseIVTy, State.VF);
14385f757f3fSDimitry Andric     UnitStepVec =
14395f757f3fSDimitry Andric         Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
14405f757f3fSDimitry Andric     SplatStep = Builder.CreateVectorSplat(State.VF, Step);
14415f757f3fSDimitry Andric     SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
14425f757f3fSDimitry Andric   }
14435f757f3fSDimitry Andric 
14445f757f3fSDimitry Andric   unsigned StartPart = 0;
14455f757f3fSDimitry Andric   unsigned EndPart = State.UF;
14465f757f3fSDimitry Andric   unsigned StartLane = 0;
14475f757f3fSDimitry Andric   unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
14485f757f3fSDimitry Andric   if (State.Instance) {
14495f757f3fSDimitry Andric     StartPart = State.Instance->Part;
14505f757f3fSDimitry Andric     EndPart = StartPart + 1;
14515f757f3fSDimitry Andric     StartLane = State.Instance->Lane.getKnownLane();
14525f757f3fSDimitry Andric     EndLane = StartLane + 1;
14535f757f3fSDimitry Andric   }
14545f757f3fSDimitry Andric   for (unsigned Part = StartPart; Part < EndPart; ++Part) {
14555f757f3fSDimitry Andric     Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
14565f757f3fSDimitry Andric 
14575f757f3fSDimitry Andric     if (!FirstLaneOnly && State.VF.isScalable()) {
14585f757f3fSDimitry Andric       auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
14595f757f3fSDimitry Andric       auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
14605f757f3fSDimitry Andric       if (BaseIVTy->isFloatingPointTy())
14615f757f3fSDimitry Andric         InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
14625f757f3fSDimitry Andric       auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
14635f757f3fSDimitry Andric       auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
14645f757f3fSDimitry Andric       State.set(this, Add, Part);
14655f757f3fSDimitry Andric       // It's useful to record the lane values too for the known minimum number
14665f757f3fSDimitry Andric       // of elements so we do those below. This improves the code quality when
14675f757f3fSDimitry Andric       // trying to extract the first element, for example.
14685f757f3fSDimitry Andric     }
14695f757f3fSDimitry Andric 
14705f757f3fSDimitry Andric     if (BaseIVTy->isFloatingPointTy())
14715f757f3fSDimitry Andric       StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
14725f757f3fSDimitry Andric 
14735f757f3fSDimitry Andric     for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
14745f757f3fSDimitry Andric       Value *StartIdx = Builder.CreateBinOp(
14755f757f3fSDimitry Andric           AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
14765f757f3fSDimitry Andric       // The step returned by `createStepForVF` is a runtime-evaluated value
14775f757f3fSDimitry Andric       // when VF is scalable. Otherwise, it should be folded into a Constant.
14785f757f3fSDimitry Andric       assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
14795f757f3fSDimitry Andric              "Expected StartIdx to be folded to a constant when VF is not "
14805f757f3fSDimitry Andric              "scalable");
14815f757f3fSDimitry Andric       auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
14825f757f3fSDimitry Andric       auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
14835f757f3fSDimitry Andric       State.set(this, Add, VPIteration(Part, Lane));
14845f757f3fSDimitry Andric     }
14855f757f3fSDimitry Andric   }
14865f757f3fSDimitry Andric }
14875f757f3fSDimitry Andric 
148881ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
148981ad6265SDimitry Andric void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
149081ad6265SDimitry Andric                                   VPSlotTracker &SlotTracker) const {
149181ad6265SDimitry Andric   O << Indent;
149281ad6265SDimitry Andric   printAsOperand(O, SlotTracker);
14935f757f3fSDimitry Andric   O << " = SCALAR-STEPS ";
149481ad6265SDimitry Andric   printOperands(O, SlotTracker);
149581ad6265SDimitry Andric }
1496753f127fSDimitry Andric #endif
149781ad6265SDimitry Andric 
1498753f127fSDimitry Andric void VPWidenGEPRecipe::execute(VPTransformState &State) {
149906c3fb27SDimitry Andric   assert(State.VF.isVector() && "not widening");
1500753f127fSDimitry Andric   auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1501753f127fSDimitry Andric   // Construct a vector GEP by widening the operands of the scalar GEP as
1502753f127fSDimitry Andric   // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1503753f127fSDimitry Andric   // results in a vector of pointers when at least one operand of the GEP
1504753f127fSDimitry Andric   // is vector-typed. Thus, to keep the representation compact, we only use
1505753f127fSDimitry Andric   // vector-typed operands for loop-varying values.
1506753f127fSDimitry Andric 
150706c3fb27SDimitry Andric   if (areAllOperandsInvariant()) {
1508753f127fSDimitry Andric     // If we are vectorizing, but the GEP has only loop-invariant operands,
1509753f127fSDimitry Andric     // the GEP we build (by only using vector-typed operands for
1510753f127fSDimitry Andric     // loop-varying values) would be a scalar pointer. Thus, to ensure we
1511753f127fSDimitry Andric     // produce a vector of pointers, we need to either arbitrarily pick an
1512753f127fSDimitry Andric     // operand to broadcast, or broadcast a clone of the original GEP.
1513753f127fSDimitry Andric     // Here, we broadcast a clone of the original.
1514753f127fSDimitry Andric     //
1515753f127fSDimitry Andric     // TODO: If at some point we decide to scalarize instructions having
1516753f127fSDimitry Andric     //       loop-invariant operands, this special case will no longer be
1517753f127fSDimitry Andric     //       required. We would add the scalarization decision to
1518753f127fSDimitry Andric     //       collectLoopScalars() and teach getVectorValue() to broadcast
1519753f127fSDimitry Andric     //       the lane-zero scalar value.
152006c3fb27SDimitry Andric     SmallVector<Value *> Ops;
152106c3fb27SDimitry Andric     for (unsigned I = 0, E = getNumOperands(); I != E; I++)
152206c3fb27SDimitry Andric       Ops.push_back(State.get(getOperand(I), VPIteration(0, 0)));
152306c3fb27SDimitry Andric 
152406c3fb27SDimitry Andric     auto *NewGEP =
152506c3fb27SDimitry Andric         State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
152606c3fb27SDimitry Andric                                 ArrayRef(Ops).drop_front(), "", isInBounds());
1527753f127fSDimitry Andric     for (unsigned Part = 0; Part < State.UF; ++Part) {
152806c3fb27SDimitry Andric       Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1529753f127fSDimitry Andric       State.set(this, EntryPart, Part);
1530753f127fSDimitry Andric       State.addMetadata(EntryPart, GEP);
1531753f127fSDimitry Andric     }
1532753f127fSDimitry Andric   } else {
1533753f127fSDimitry Andric     // If the GEP has at least one loop-varying operand, we are sure to
1534753f127fSDimitry Andric     // produce a vector of pointers. But if we are only unrolling, we want
1535753f127fSDimitry Andric     // to produce a scalar GEP for each unroll part. Thus, the GEP we
1536753f127fSDimitry Andric     // produce with the code below will be scalar (if VF == 1) or vector
1537753f127fSDimitry Andric     // (otherwise). Note that for the unroll-only case, we still maintain
1538753f127fSDimitry Andric     // values in the vector mapping with initVector, as we do for other
1539753f127fSDimitry Andric     // instructions.
1540753f127fSDimitry Andric     for (unsigned Part = 0; Part < State.UF; ++Part) {
1541753f127fSDimitry Andric       // The pointer operand of the new GEP. If it's loop-invariant, we
1542753f127fSDimitry Andric       // won't broadcast it.
154306c3fb27SDimitry Andric       auto *Ptr = isPointerLoopInvariant()
1544753f127fSDimitry Andric                       ? State.get(getOperand(0), VPIteration(0, 0))
1545753f127fSDimitry Andric                       : State.get(getOperand(0), Part);
1546753f127fSDimitry Andric 
1547753f127fSDimitry Andric       // Collect all the indices for the new GEP. If any index is
1548753f127fSDimitry Andric       // loop-invariant, we won't broadcast it.
1549753f127fSDimitry Andric       SmallVector<Value *, 4> Indices;
1550753f127fSDimitry Andric       for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1551753f127fSDimitry Andric         VPValue *Operand = getOperand(I);
155206c3fb27SDimitry Andric         if (isIndexLoopInvariant(I - 1))
1553753f127fSDimitry Andric           Indices.push_back(State.get(Operand, VPIteration(0, 0)));
1554753f127fSDimitry Andric         else
1555753f127fSDimitry Andric           Indices.push_back(State.get(Operand, Part));
1556753f127fSDimitry Andric       }
1557753f127fSDimitry Andric 
1558753f127fSDimitry Andric       // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1559753f127fSDimitry Andric       // but it should be a vector, otherwise.
1560753f127fSDimitry Andric       auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
156106c3fb27SDimitry Andric                                              Indices, "", isInBounds());
1562753f127fSDimitry Andric       assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
1563753f127fSDimitry Andric              "NewGEP is not a pointer vector");
1564753f127fSDimitry Andric       State.set(this, NewGEP, Part);
1565753f127fSDimitry Andric       State.addMetadata(NewGEP, GEP);
1566753f127fSDimitry Andric     }
1567753f127fSDimitry Andric   }
1568753f127fSDimitry Andric }
1569753f127fSDimitry Andric 
1570753f127fSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
157181ad6265SDimitry Andric void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
157281ad6265SDimitry Andric                              VPSlotTracker &SlotTracker) const {
157381ad6265SDimitry Andric   O << Indent << "WIDEN-GEP ";
157406c3fb27SDimitry Andric   O << (isPointerLoopInvariant() ? "Inv" : "Var");
157506c3fb27SDimitry Andric   for (size_t I = 0; I < getNumOperands() - 1; ++I)
157606c3fb27SDimitry Andric     O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
157781ad6265SDimitry Andric 
157881ad6265SDimitry Andric   O << " ";
157981ad6265SDimitry Andric   printAsOperand(O, SlotTracker);
158081ad6265SDimitry Andric   O << " = getelementptr";
158106c3fb27SDimitry Andric   printFlags(O);
158281ad6265SDimitry Andric   printOperands(O, SlotTracker);
158381ad6265SDimitry Andric }
1584753f127fSDimitry Andric #endif
158581ad6265SDimitry Andric 
1586647cbc5dSDimitry Andric void VPVectorPointerRecipe ::execute(VPTransformState &State) {
1587647cbc5dSDimitry Andric   auto &Builder = State.Builder;
1588647cbc5dSDimitry Andric   State.setDebugLocFrom(getDebugLoc());
1589647cbc5dSDimitry Andric   for (unsigned Part = 0; Part < State.UF; ++Part) {
1590647cbc5dSDimitry Andric     // Calculate the pointer for the specific unroll-part.
1591647cbc5dSDimitry Andric     Value *PartPtr = nullptr;
1592647cbc5dSDimitry Andric     // Use i32 for the gep index type when the value is constant,
1593647cbc5dSDimitry Andric     // or query DataLayout for a more suitable index type otherwise.
1594647cbc5dSDimitry Andric     const DataLayout &DL =
1595*0fca6ea1SDimitry Andric         Builder.GetInsertBlock()->getDataLayout();
1596647cbc5dSDimitry Andric     Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0)
1597647cbc5dSDimitry Andric                         ? DL.getIndexType(IndexedTy->getPointerTo())
1598647cbc5dSDimitry Andric                         : Builder.getInt32Ty();
1599647cbc5dSDimitry Andric     Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
16001db9f3b2SDimitry Andric     bool InBounds = isInBounds();
1601647cbc5dSDimitry Andric     if (IsReverse) {
1602647cbc5dSDimitry Andric       // If the address is consecutive but reversed, then the
1603647cbc5dSDimitry Andric       // wide store needs to start at the last vector element.
1604647cbc5dSDimitry Andric       // RunTimeVF =  VScale * VF.getKnownMinValue()
1605647cbc5dSDimitry Andric       // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
1606647cbc5dSDimitry Andric       Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
1607647cbc5dSDimitry Andric       // NumElt = -Part * RunTimeVF
1608647cbc5dSDimitry Andric       Value *NumElt = Builder.CreateMul(
1609647cbc5dSDimitry Andric           ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);
1610647cbc5dSDimitry Andric       // LastLane = 1 - RunTimeVF
1611647cbc5dSDimitry Andric       Value *LastLane =
1612647cbc5dSDimitry Andric           Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
1613647cbc5dSDimitry Andric       PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
1614647cbc5dSDimitry Andric       PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds);
1615647cbc5dSDimitry Andric     } else {
1616647cbc5dSDimitry Andric       Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
1617647cbc5dSDimitry Andric       PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
1618647cbc5dSDimitry Andric     }
1619647cbc5dSDimitry Andric 
1620*0fca6ea1SDimitry Andric     State.set(this, PartPtr, Part, /*IsScalar*/ true);
1621647cbc5dSDimitry Andric   }
1622647cbc5dSDimitry Andric }
1623647cbc5dSDimitry Andric 
1624647cbc5dSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1625647cbc5dSDimitry Andric void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
1626647cbc5dSDimitry Andric                                   VPSlotTracker &SlotTracker) const {
1627647cbc5dSDimitry Andric   O << Indent;
1628647cbc5dSDimitry Andric   printAsOperand(O, SlotTracker);
1629647cbc5dSDimitry Andric   O << " = vector-pointer ";
1630647cbc5dSDimitry Andric   if (IsReverse)
1631647cbc5dSDimitry Andric     O << "(reverse) ";
1632647cbc5dSDimitry Andric 
1633647cbc5dSDimitry Andric   printOperands(O, SlotTracker);
1634647cbc5dSDimitry Andric }
1635647cbc5dSDimitry Andric #endif
1636647cbc5dSDimitry Andric 
1637753f127fSDimitry Andric void VPBlendRecipe::execute(VPTransformState &State) {
16385f757f3fSDimitry Andric   State.setDebugLocFrom(getDebugLoc());
1639753f127fSDimitry Andric   // We know that all PHIs in non-header blocks are converted into
1640753f127fSDimitry Andric   // selects, so we don't have to worry about the insertion order and we
1641753f127fSDimitry Andric   // can just use the builder.
1642753f127fSDimitry Andric   // At this point we generate the predication tree. There may be
1643753f127fSDimitry Andric   // duplications since this is a simple recursive scan, but future
1644753f127fSDimitry Andric   // optimizations will clean it up.
1645753f127fSDimitry Andric 
1646753f127fSDimitry Andric   unsigned NumIncoming = getNumIncomingValues();
1647753f127fSDimitry Andric 
1648753f127fSDimitry Andric   // Generate a sequence of selects of the form:
1649753f127fSDimitry Andric   // SELECT(Mask3, In3,
1650753f127fSDimitry Andric   //        SELECT(Mask2, In2,
1651753f127fSDimitry Andric   //               SELECT(Mask1, In1,
1652753f127fSDimitry Andric   //                      In0)))
1653753f127fSDimitry Andric   // Note that Mask0 is never used: lanes for which no path reaches this phi and
1654753f127fSDimitry Andric   // are essentially undef are taken from In0.
1655753f127fSDimitry Andric  VectorParts Entry(State.UF);
1656*0fca6ea1SDimitry Andric  bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
1657753f127fSDimitry Andric  for (unsigned In = 0; In < NumIncoming; ++In) {
1658753f127fSDimitry Andric    for (unsigned Part = 0; Part < State.UF; ++Part) {
1659753f127fSDimitry Andric      // We might have single edge PHIs (blocks) - use an identity
1660753f127fSDimitry Andric      // 'select' for the first PHI operand.
1661*0fca6ea1SDimitry Andric      Value *In0 = State.get(getIncomingValue(In), Part, OnlyFirstLaneUsed);
1662753f127fSDimitry Andric      if (In == 0)
1663753f127fSDimitry Andric        Entry[Part] = In0; // Initialize with the first incoming value.
1664753f127fSDimitry Andric      else {
1665753f127fSDimitry Andric        // Select between the current value and the previous incoming edge
1666753f127fSDimitry Andric        // based on the incoming mask.
1667*0fca6ea1SDimitry Andric        Value *Cond = State.get(getMask(In), Part, OnlyFirstLaneUsed);
1668753f127fSDimitry Andric        Entry[Part] =
1669753f127fSDimitry Andric            State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
1670753f127fSDimitry Andric      }
1671753f127fSDimitry Andric    }
1672753f127fSDimitry Andric  }
1673753f127fSDimitry Andric   for (unsigned Part = 0; Part < State.UF; ++Part)
1674*0fca6ea1SDimitry Andric     State.set(this, Entry[Part], Part, OnlyFirstLaneUsed);
1675753f127fSDimitry Andric }
1676753f127fSDimitry Andric 
1677753f127fSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
167881ad6265SDimitry Andric void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
167981ad6265SDimitry Andric                           VPSlotTracker &SlotTracker) const {
168081ad6265SDimitry Andric   O << Indent << "BLEND ";
16815f757f3fSDimitry Andric   printAsOperand(O, SlotTracker);
168281ad6265SDimitry Andric   O << " =";
168381ad6265SDimitry Andric   if (getNumIncomingValues() == 1) {
168481ad6265SDimitry Andric     // Not a User of any mask: not really blending, this is a
168581ad6265SDimitry Andric     // single-predecessor phi.
168681ad6265SDimitry Andric     O << " ";
168781ad6265SDimitry Andric     getIncomingValue(0)->printAsOperand(O, SlotTracker);
168881ad6265SDimitry Andric   } else {
168981ad6265SDimitry Andric     for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
169081ad6265SDimitry Andric       O << " ";
169181ad6265SDimitry Andric       getIncomingValue(I)->printAsOperand(O, SlotTracker);
1692*0fca6ea1SDimitry Andric       if (I == 0)
1693*0fca6ea1SDimitry Andric         continue;
169481ad6265SDimitry Andric       O << "/";
169581ad6265SDimitry Andric       getMask(I)->printAsOperand(O, SlotTracker);
169681ad6265SDimitry Andric     }
169781ad6265SDimitry Andric   }
169881ad6265SDimitry Andric }
1699*0fca6ea1SDimitry Andric #endif
170081ad6265SDimitry Andric 
1701*0fca6ea1SDimitry Andric void VPReductionRecipe::execute(VPTransformState &State) {
1702*0fca6ea1SDimitry Andric   assert(!State.Instance && "Reduction being replicated.");
1703*0fca6ea1SDimitry Andric   Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true);
1704*0fca6ea1SDimitry Andric   RecurKind Kind = RdxDesc.getRecurrenceKind();
1705*0fca6ea1SDimitry Andric   // Propagate the fast-math flags carried by the underlying instruction.
1706*0fca6ea1SDimitry Andric   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
1707*0fca6ea1SDimitry Andric   State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
1708*0fca6ea1SDimitry Andric   for (unsigned Part = 0; Part < State.UF; ++Part) {
1709*0fca6ea1SDimitry Andric     Value *NewVecOp = State.get(getVecOp(), Part);
1710*0fca6ea1SDimitry Andric     if (VPValue *Cond = getCondOp()) {
1711*0fca6ea1SDimitry Andric       Value *NewCond = State.get(Cond, Part, State.VF.isScalar());
1712*0fca6ea1SDimitry Andric       VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
1713*0fca6ea1SDimitry Andric       Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
1714*0fca6ea1SDimitry Andric       Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy,
1715*0fca6ea1SDimitry Andric                                                   RdxDesc.getFastMathFlags());
1716*0fca6ea1SDimitry Andric       if (State.VF.isVector()) {
1717*0fca6ea1SDimitry Andric         Iden = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
1718*0fca6ea1SDimitry Andric       }
1719*0fca6ea1SDimitry Andric 
1720*0fca6ea1SDimitry Andric       Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden);
1721*0fca6ea1SDimitry Andric       NewVecOp = Select;
1722*0fca6ea1SDimitry Andric     }
1723*0fca6ea1SDimitry Andric     Value *NewRed;
1724*0fca6ea1SDimitry Andric     Value *NextInChain;
1725*0fca6ea1SDimitry Andric     if (IsOrdered) {
1726*0fca6ea1SDimitry Andric       if (State.VF.isVector())
1727*0fca6ea1SDimitry Andric         NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp,
1728*0fca6ea1SDimitry Andric                                         PrevInChain);
1729*0fca6ea1SDimitry Andric       else
1730*0fca6ea1SDimitry Andric         NewRed = State.Builder.CreateBinOp(
1731*0fca6ea1SDimitry Andric             (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain,
1732*0fca6ea1SDimitry Andric             NewVecOp);
1733*0fca6ea1SDimitry Andric       PrevInChain = NewRed;
1734*0fca6ea1SDimitry Andric     } else {
1735*0fca6ea1SDimitry Andric       PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true);
1736*0fca6ea1SDimitry Andric       NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp);
1737*0fca6ea1SDimitry Andric     }
1738*0fca6ea1SDimitry Andric     if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
1739*0fca6ea1SDimitry Andric       NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
1740*0fca6ea1SDimitry Andric                                    NewRed, PrevInChain);
1741*0fca6ea1SDimitry Andric     } else if (IsOrdered)
1742*0fca6ea1SDimitry Andric       NextInChain = NewRed;
1743*0fca6ea1SDimitry Andric     else
1744*0fca6ea1SDimitry Andric       NextInChain = State.Builder.CreateBinOp(
1745*0fca6ea1SDimitry Andric           (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain);
1746*0fca6ea1SDimitry Andric     State.set(this, NextInChain, Part, /*IsScalar*/ true);
1747*0fca6ea1SDimitry Andric   }
1748*0fca6ea1SDimitry Andric }
1749*0fca6ea1SDimitry Andric 
1750*0fca6ea1SDimitry Andric void VPReductionEVLRecipe::execute(VPTransformState &State) {
1751*0fca6ea1SDimitry Andric   assert(!State.Instance && "Reduction being replicated.");
1752*0fca6ea1SDimitry Andric   assert(State.UF == 1 &&
1753*0fca6ea1SDimitry Andric          "Expected only UF == 1 when vectorizing with explicit vector length.");
1754*0fca6ea1SDimitry Andric 
1755*0fca6ea1SDimitry Andric   auto &Builder = State.Builder;
1756*0fca6ea1SDimitry Andric   // Propagate the fast-math flags carried by the underlying instruction.
1757*0fca6ea1SDimitry Andric   IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
1758*0fca6ea1SDimitry Andric   const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor();
1759*0fca6ea1SDimitry Andric   Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
1760*0fca6ea1SDimitry Andric 
1761*0fca6ea1SDimitry Andric   RecurKind Kind = RdxDesc.getRecurrenceKind();
1762*0fca6ea1SDimitry Andric   Value *Prev = State.get(getChainOp(), 0, /*IsScalar*/ true);
1763*0fca6ea1SDimitry Andric   Value *VecOp = State.get(getVecOp(), 0);
1764*0fca6ea1SDimitry Andric   Value *EVL = State.get(getEVL(), VPIteration(0, 0));
1765*0fca6ea1SDimitry Andric 
1766*0fca6ea1SDimitry Andric   VectorBuilder VBuilder(Builder);
1767*0fca6ea1SDimitry Andric   VBuilder.setEVL(EVL);
1768*0fca6ea1SDimitry Andric   Value *Mask;
1769*0fca6ea1SDimitry Andric   // TODO: move the all-true mask generation into VectorBuilder.
1770*0fca6ea1SDimitry Andric   if (VPValue *CondOp = getCondOp())
1771*0fca6ea1SDimitry Andric     Mask = State.get(CondOp, 0);
1772*0fca6ea1SDimitry Andric   else
1773*0fca6ea1SDimitry Andric     Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
1774*0fca6ea1SDimitry Andric   VBuilder.setMask(Mask);
1775*0fca6ea1SDimitry Andric 
1776*0fca6ea1SDimitry Andric   Value *NewRed;
1777*0fca6ea1SDimitry Andric   if (isOrdered()) {
1778*0fca6ea1SDimitry Andric     NewRed = createOrderedReduction(VBuilder, RdxDesc, VecOp, Prev);
1779*0fca6ea1SDimitry Andric   } else {
1780*0fca6ea1SDimitry Andric     NewRed = createSimpleTargetReduction(VBuilder, VecOp, RdxDesc);
1781*0fca6ea1SDimitry Andric     if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))
1782*0fca6ea1SDimitry Andric       NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);
1783*0fca6ea1SDimitry Andric     else
1784*0fca6ea1SDimitry Andric       NewRed = Builder.CreateBinOp(
1785*0fca6ea1SDimitry Andric           (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, Prev);
1786*0fca6ea1SDimitry Andric   }
1787*0fca6ea1SDimitry Andric   State.set(this, NewRed, 0, /*IsScalar*/ true);
1788*0fca6ea1SDimitry Andric }
1789*0fca6ea1SDimitry Andric 
1790*0fca6ea1SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
179181ad6265SDimitry Andric void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
179281ad6265SDimitry Andric                               VPSlotTracker &SlotTracker) const {
179381ad6265SDimitry Andric   O << Indent << "REDUCE ";
179481ad6265SDimitry Andric   printAsOperand(O, SlotTracker);
179581ad6265SDimitry Andric   O << " = ";
179681ad6265SDimitry Andric   getChainOp()->printAsOperand(O, SlotTracker);
179781ad6265SDimitry Andric   O << " +";
179881ad6265SDimitry Andric   if (isa<FPMathOperator>(getUnderlyingInstr()))
179981ad6265SDimitry Andric     O << getUnderlyingInstr()->getFastMathFlags();
18005f757f3fSDimitry Andric   O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
180181ad6265SDimitry Andric   getVecOp()->printAsOperand(O, SlotTracker);
1802*0fca6ea1SDimitry Andric   if (isConditional()) {
1803*0fca6ea1SDimitry Andric     O << ", ";
1804*0fca6ea1SDimitry Andric     getCondOp()->printAsOperand(O, SlotTracker);
1805*0fca6ea1SDimitry Andric   }
1806*0fca6ea1SDimitry Andric   O << ")";
1807*0fca6ea1SDimitry Andric   if (RdxDesc.IntermediateStore)
1808*0fca6ea1SDimitry Andric     O << " (with final reduction value stored in invariant address sank "
1809*0fca6ea1SDimitry Andric          "outside of loop)";
1810*0fca6ea1SDimitry Andric }
1811*0fca6ea1SDimitry Andric 
1812*0fca6ea1SDimitry Andric void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent,
1813*0fca6ea1SDimitry Andric                                  VPSlotTracker &SlotTracker) const {
1814*0fca6ea1SDimitry Andric   const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor();
1815*0fca6ea1SDimitry Andric   O << Indent << "REDUCE ";
1816*0fca6ea1SDimitry Andric   printAsOperand(O, SlotTracker);
1817*0fca6ea1SDimitry Andric   O << " = ";
1818*0fca6ea1SDimitry Andric   getChainOp()->printAsOperand(O, SlotTracker);
1819*0fca6ea1SDimitry Andric   O << " +";
1820*0fca6ea1SDimitry Andric   if (isa<FPMathOperator>(getUnderlyingInstr()))
1821*0fca6ea1SDimitry Andric     O << getUnderlyingInstr()->getFastMathFlags();
1822*0fca6ea1SDimitry Andric   O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
1823*0fca6ea1SDimitry Andric   getVecOp()->printAsOperand(O, SlotTracker);
1824*0fca6ea1SDimitry Andric   O << ", ";
1825*0fca6ea1SDimitry Andric   getEVL()->printAsOperand(O, SlotTracker);
1826*0fca6ea1SDimitry Andric   if (isConditional()) {
182781ad6265SDimitry Andric     O << ", ";
182881ad6265SDimitry Andric     getCondOp()->printAsOperand(O, SlotTracker);
182981ad6265SDimitry Andric   }
183081ad6265SDimitry Andric   O << ")";
18315f757f3fSDimitry Andric   if (RdxDesc.IntermediateStore)
183281ad6265SDimitry Andric     O << " (with final reduction value stored in invariant address sank "
183381ad6265SDimitry Andric          "outside of loop)";
183481ad6265SDimitry Andric }
183506c3fb27SDimitry Andric #endif
183681ad6265SDimitry Andric 
183706c3fb27SDimitry Andric bool VPReplicateRecipe::shouldPack() const {
183806c3fb27SDimitry Andric   // Find if the recipe is used by a widened recipe via an intervening
183906c3fb27SDimitry Andric   // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
184006c3fb27SDimitry Andric   return any_of(users(), [](const VPUser *U) {
184106c3fb27SDimitry Andric     if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
184206c3fb27SDimitry Andric       return any_of(PredR->users(), [PredR](const VPUser *U) {
184306c3fb27SDimitry Andric         return !U->usesScalars(PredR);
184406c3fb27SDimitry Andric       });
184506c3fb27SDimitry Andric     return false;
184606c3fb27SDimitry Andric   });
184706c3fb27SDimitry Andric }
184806c3fb27SDimitry Andric 
184906c3fb27SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
185081ad6265SDimitry Andric void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
185181ad6265SDimitry Andric                               VPSlotTracker &SlotTracker) const {
185281ad6265SDimitry Andric   O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
185381ad6265SDimitry Andric 
185481ad6265SDimitry Andric   if (!getUnderlyingInstr()->getType()->isVoidTy()) {
185581ad6265SDimitry Andric     printAsOperand(O, SlotTracker);
185681ad6265SDimitry Andric     O << " = ";
185781ad6265SDimitry Andric   }
185881ad6265SDimitry Andric   if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
185906c3fb27SDimitry Andric     O << "call";
186006c3fb27SDimitry Andric     printFlags(O);
186106c3fb27SDimitry Andric     O << "@" << CB->getCalledFunction()->getName() << "(";
186281ad6265SDimitry Andric     interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
186381ad6265SDimitry Andric                     O, [&O, &SlotTracker](VPValue *Op) {
186481ad6265SDimitry Andric                       Op->printAsOperand(O, SlotTracker);
186581ad6265SDimitry Andric                     });
186681ad6265SDimitry Andric     O << ")";
186781ad6265SDimitry Andric   } else {
186806c3fb27SDimitry Andric     O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode());
186906c3fb27SDimitry Andric     printFlags(O);
187081ad6265SDimitry Andric     printOperands(O, SlotTracker);
187181ad6265SDimitry Andric   }
187281ad6265SDimitry Andric 
187306c3fb27SDimitry Andric   if (shouldPack())
187481ad6265SDimitry Andric     O << " (S->V)";
187581ad6265SDimitry Andric }
1876753f127fSDimitry Andric #endif
187781ad6265SDimitry Andric 
1878*0fca6ea1SDimitry Andric /// Checks if \p C is uniform across all VFs and UFs. It is considered as such
1879*0fca6ea1SDimitry Andric /// if it is either defined outside the vector region or its operand is known to
1880*0fca6ea1SDimitry Andric /// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).
1881*0fca6ea1SDimitry Andric /// TODO: Uniformity should be associated with a VPValue and there should be a
1882*0fca6ea1SDimitry Andric /// generic way to check.
1883*0fca6ea1SDimitry Andric static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C) {
1884*0fca6ea1SDimitry Andric   return C->isDefinedOutsideVectorRegions() ||
1885*0fca6ea1SDimitry Andric          isa<VPDerivedIVRecipe>(C->getOperand(0)) ||
1886*0fca6ea1SDimitry Andric          isa<VPCanonicalIVPHIRecipe>(C->getOperand(0));
1887*0fca6ea1SDimitry Andric }
1888*0fca6ea1SDimitry Andric 
1889*0fca6ea1SDimitry Andric Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
1890*0fca6ea1SDimitry Andric   assert(vputils::onlyFirstLaneUsed(this) &&
1891*0fca6ea1SDimitry Andric          "Codegen only implemented for first lane.");
1892*0fca6ea1SDimitry Andric   switch (Opcode) {
1893*0fca6ea1SDimitry Andric   case Instruction::SExt:
1894*0fca6ea1SDimitry Andric   case Instruction::ZExt:
1895*0fca6ea1SDimitry Andric   case Instruction::Trunc: {
1896*0fca6ea1SDimitry Andric     // Note: SExt/ZExt not used yet.
1897*0fca6ea1SDimitry Andric     Value *Op = State.get(getOperand(0), VPIteration(Part, 0));
1898*0fca6ea1SDimitry Andric     return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
1899*0fca6ea1SDimitry Andric   }
1900*0fca6ea1SDimitry Andric   default:
1901*0fca6ea1SDimitry Andric     llvm_unreachable("opcode not implemented yet");
1902*0fca6ea1SDimitry Andric   }
1903*0fca6ea1SDimitry Andric }
1904*0fca6ea1SDimitry Andric 
1905*0fca6ea1SDimitry Andric void VPScalarCastRecipe ::execute(VPTransformState &State) {
1906*0fca6ea1SDimitry Andric   bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this);
1907*0fca6ea1SDimitry Andric   for (unsigned Part = 0; Part != State.UF; ++Part) {
1908*0fca6ea1SDimitry Andric     Value *Res;
1909*0fca6ea1SDimitry Andric     // Only generate a single instance, if the recipe is uniform across UFs and
1910*0fca6ea1SDimitry Andric     // VFs.
1911*0fca6ea1SDimitry Andric     if (Part > 0 && IsUniformAcrossVFsAndUFs)
1912*0fca6ea1SDimitry Andric       Res = State.get(this, VPIteration(0, 0));
1913*0fca6ea1SDimitry Andric     else
1914*0fca6ea1SDimitry Andric       Res = generate(State, Part);
1915*0fca6ea1SDimitry Andric     State.set(this, Res, VPIteration(Part, 0));
1916*0fca6ea1SDimitry Andric   }
1917*0fca6ea1SDimitry Andric }
1918*0fca6ea1SDimitry Andric 
1919*0fca6ea1SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1920*0fca6ea1SDimitry Andric void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
1921*0fca6ea1SDimitry Andric                                 VPSlotTracker &SlotTracker) const {
1922*0fca6ea1SDimitry Andric   O << Indent << "SCALAR-CAST ";
1923*0fca6ea1SDimitry Andric   printAsOperand(O, SlotTracker);
1924*0fca6ea1SDimitry Andric   O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1925*0fca6ea1SDimitry Andric   printOperands(O, SlotTracker);
1926*0fca6ea1SDimitry Andric   O << " to " << *ResultTy;
1927*0fca6ea1SDimitry Andric }
1928*0fca6ea1SDimitry Andric #endif
1929*0fca6ea1SDimitry Andric 
1930753f127fSDimitry Andric void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
1931753f127fSDimitry Andric   assert(State.Instance && "Branch on Mask works only on single instance.");
1932753f127fSDimitry Andric 
1933753f127fSDimitry Andric   unsigned Part = State.Instance->Part;
1934753f127fSDimitry Andric   unsigned Lane = State.Instance->Lane.getKnownLane();
1935753f127fSDimitry Andric 
1936753f127fSDimitry Andric   Value *ConditionBit = nullptr;
1937753f127fSDimitry Andric   VPValue *BlockInMask = getMask();
1938753f127fSDimitry Andric   if (BlockInMask) {
1939753f127fSDimitry Andric     ConditionBit = State.get(BlockInMask, Part);
1940753f127fSDimitry Andric     if (ConditionBit->getType()->isVectorTy())
1941753f127fSDimitry Andric       ConditionBit = State.Builder.CreateExtractElement(
1942753f127fSDimitry Andric           ConditionBit, State.Builder.getInt32(Lane));
1943753f127fSDimitry Andric   } else // Block in mask is all-one.
1944753f127fSDimitry Andric     ConditionBit = State.Builder.getTrue();
1945753f127fSDimitry Andric 
1946753f127fSDimitry Andric   // Replace the temporary unreachable terminator with a new conditional branch,
1947753f127fSDimitry Andric   // whose two destinations will be set later when they are created.
1948753f127fSDimitry Andric   auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
1949753f127fSDimitry Andric   assert(isa<UnreachableInst>(CurrentTerminator) &&
1950753f127fSDimitry Andric          "Expected to replace unreachable terminator with conditional branch.");
1951753f127fSDimitry Andric   auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
1952753f127fSDimitry Andric   CondBr->setSuccessor(0, nullptr);
1953753f127fSDimitry Andric   ReplaceInstWithInst(CurrentTerminator, CondBr);
1954753f127fSDimitry Andric }
1955753f127fSDimitry Andric 
1956fcaf7f86SDimitry Andric void VPPredInstPHIRecipe::execute(VPTransformState &State) {
1957fcaf7f86SDimitry Andric   assert(State.Instance && "Predicated instruction PHI works per instance.");
1958fcaf7f86SDimitry Andric   Instruction *ScalarPredInst =
1959fcaf7f86SDimitry Andric       cast<Instruction>(State.get(getOperand(0), *State.Instance));
1960fcaf7f86SDimitry Andric   BasicBlock *PredicatedBB = ScalarPredInst->getParent();
1961fcaf7f86SDimitry Andric   BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
1962fcaf7f86SDimitry Andric   assert(PredicatingBB && "Predicated block has no single predecessor.");
1963fcaf7f86SDimitry Andric   assert(isa<VPReplicateRecipe>(getOperand(0)) &&
1964fcaf7f86SDimitry Andric          "operand must be VPReplicateRecipe");
1965fcaf7f86SDimitry Andric 
1966fcaf7f86SDimitry Andric   // By current pack/unpack logic we need to generate only a single phi node: if
1967fcaf7f86SDimitry Andric   // a vector value for the predicated instruction exists at this point it means
1968fcaf7f86SDimitry Andric   // the instruction has vector users only, and a phi for the vector value is
1969fcaf7f86SDimitry Andric   // needed. In this case the recipe of the predicated instruction is marked to
1970fcaf7f86SDimitry Andric   // also do that packing, thereby "hoisting" the insert-element sequence.
1971fcaf7f86SDimitry Andric   // Otherwise, a phi node for the scalar value is needed.
1972fcaf7f86SDimitry Andric   unsigned Part = State.Instance->Part;
1973fcaf7f86SDimitry Andric   if (State.hasVectorValue(getOperand(0), Part)) {
1974fcaf7f86SDimitry Andric     Value *VectorValue = State.get(getOperand(0), Part);
1975fcaf7f86SDimitry Andric     InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
1976fcaf7f86SDimitry Andric     PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
1977fcaf7f86SDimitry Andric     VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
1978fcaf7f86SDimitry Andric     VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
1979fcaf7f86SDimitry Andric     if (State.hasVectorValue(this, Part))
1980fcaf7f86SDimitry Andric       State.reset(this, VPhi, Part);
1981fcaf7f86SDimitry Andric     else
1982fcaf7f86SDimitry Andric       State.set(this, VPhi, Part);
1983fcaf7f86SDimitry Andric     // NOTE: Currently we need to update the value of the operand, so the next
1984fcaf7f86SDimitry Andric     // predicated iteration inserts its generated value in the correct vector.
1985fcaf7f86SDimitry Andric     State.reset(getOperand(0), VPhi, Part);
1986fcaf7f86SDimitry Andric   } else {
1987fcaf7f86SDimitry Andric     Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
1988fcaf7f86SDimitry Andric     PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
1989fcaf7f86SDimitry Andric     Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
1990fcaf7f86SDimitry Andric                      PredicatingBB);
1991fcaf7f86SDimitry Andric     Phi->addIncoming(ScalarPredInst, PredicatedBB);
1992fcaf7f86SDimitry Andric     if (State.hasScalarValue(this, *State.Instance))
1993fcaf7f86SDimitry Andric       State.reset(this, Phi, *State.Instance);
1994fcaf7f86SDimitry Andric     else
1995fcaf7f86SDimitry Andric       State.set(this, Phi, *State.Instance);
1996fcaf7f86SDimitry Andric     // NOTE: Currently we need to update the value of the operand, so the next
1997fcaf7f86SDimitry Andric     // predicated iteration inserts its generated value in the correct vector.
1998fcaf7f86SDimitry Andric     State.reset(getOperand(0), Phi, *State.Instance);
1999fcaf7f86SDimitry Andric   }
2000fcaf7f86SDimitry Andric }
2001fcaf7f86SDimitry Andric 
2002753f127fSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
200381ad6265SDimitry Andric void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
200481ad6265SDimitry Andric                                 VPSlotTracker &SlotTracker) const {
200581ad6265SDimitry Andric   O << Indent << "PHI-PREDICATED-INSTRUCTION ";
200681ad6265SDimitry Andric   printAsOperand(O, SlotTracker);
200781ad6265SDimitry Andric   O << " = ";
200881ad6265SDimitry Andric   printOperands(O, SlotTracker);
200981ad6265SDimitry Andric }
201081ad6265SDimitry Andric 
2011*0fca6ea1SDimitry Andric void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,
201281ad6265SDimitry Andric                               VPSlotTracker &SlotTracker) const {
201381ad6265SDimitry Andric   O << Indent << "WIDEN ";
2014*0fca6ea1SDimitry Andric   printAsOperand(O, SlotTracker);
2015*0fca6ea1SDimitry Andric   O << " = load ";
201681ad6265SDimitry Andric   printOperands(O, SlotTracker);
201781ad6265SDimitry Andric }
2018*0fca6ea1SDimitry Andric 
2019*0fca6ea1SDimitry Andric void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent,
2020*0fca6ea1SDimitry Andric                                  VPSlotTracker &SlotTracker) const {
2021*0fca6ea1SDimitry Andric   O << Indent << "WIDEN ";
2022*0fca6ea1SDimitry Andric   printAsOperand(O, SlotTracker);
2023*0fca6ea1SDimitry Andric   O << " = vp.load ";
2024*0fca6ea1SDimitry Andric   printOperands(O, SlotTracker);
2025*0fca6ea1SDimitry Andric }
2026*0fca6ea1SDimitry Andric 
2027*0fca6ea1SDimitry Andric void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
2028*0fca6ea1SDimitry Andric                                VPSlotTracker &SlotTracker) const {
2029*0fca6ea1SDimitry Andric   O << Indent << "WIDEN store ";
2030*0fca6ea1SDimitry Andric   printOperands(O, SlotTracker);
2031*0fca6ea1SDimitry Andric }
2032*0fca6ea1SDimitry Andric 
2033*0fca6ea1SDimitry Andric void VPWidenStoreEVLRecipe::print(raw_ostream &O, const Twine &Indent,
2034*0fca6ea1SDimitry Andric                                   VPSlotTracker &SlotTracker) const {
2035*0fca6ea1SDimitry Andric   O << Indent << "WIDEN vp.store ";
2036*0fca6ea1SDimitry Andric   printOperands(O, SlotTracker);
2037*0fca6ea1SDimitry Andric }
2038*0fca6ea1SDimitry Andric #endif
2039*0fca6ea1SDimitry Andric 
2040*0fca6ea1SDimitry Andric static Value *createBitOrPointerCast(IRBuilderBase &Builder, Value *V,
2041*0fca6ea1SDimitry Andric                                      VectorType *DstVTy, const DataLayout &DL) {
2042*0fca6ea1SDimitry Andric   // Verify that V is a vector type with same number of elements as DstVTy.
2043*0fca6ea1SDimitry Andric   auto VF = DstVTy->getElementCount();
2044*0fca6ea1SDimitry Andric   auto *SrcVecTy = cast<VectorType>(V->getType());
2045*0fca6ea1SDimitry Andric   assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");
2046*0fca6ea1SDimitry Andric   Type *SrcElemTy = SrcVecTy->getElementType();
2047*0fca6ea1SDimitry Andric   Type *DstElemTy = DstVTy->getElementType();
2048*0fca6ea1SDimitry Andric   assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
2049*0fca6ea1SDimitry Andric          "Vector elements must have same size");
2050*0fca6ea1SDimitry Andric 
2051*0fca6ea1SDimitry Andric   // Do a direct cast if element types are castable.
2052*0fca6ea1SDimitry Andric   if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
2053*0fca6ea1SDimitry Andric     return Builder.CreateBitOrPointerCast(V, DstVTy);
2054*0fca6ea1SDimitry Andric   }
2055*0fca6ea1SDimitry Andric   // V cannot be directly casted to desired vector type.
2056*0fca6ea1SDimitry Andric   // May happen when V is a floating point vector but DstVTy is a vector of
2057*0fca6ea1SDimitry Andric   // pointers or vice-versa. Handle this using a two-step bitcast using an
2058*0fca6ea1SDimitry Andric   // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.
2059*0fca6ea1SDimitry Andric   assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&
2060*0fca6ea1SDimitry Andric          "Only one type should be a pointer type");
2061*0fca6ea1SDimitry Andric   assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&
2062*0fca6ea1SDimitry Andric          "Only one type should be a floating point type");
2063*0fca6ea1SDimitry Andric   Type *IntTy =
2064*0fca6ea1SDimitry Andric       IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
2065*0fca6ea1SDimitry Andric   auto *VecIntTy = VectorType::get(IntTy, VF);
2066*0fca6ea1SDimitry Andric   Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
2067*0fca6ea1SDimitry Andric   return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
2068*0fca6ea1SDimitry Andric }
2069*0fca6ea1SDimitry Andric 
2070*0fca6ea1SDimitry Andric /// Return a vector containing interleaved elements from multiple
2071*0fca6ea1SDimitry Andric /// smaller input vectors.
2072*0fca6ea1SDimitry Andric static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
2073*0fca6ea1SDimitry Andric                                 const Twine &Name) {
2074*0fca6ea1SDimitry Andric   unsigned Factor = Vals.size();
2075*0fca6ea1SDimitry Andric   assert(Factor > 1 && "Tried to interleave invalid number of vectors");
2076*0fca6ea1SDimitry Andric 
2077*0fca6ea1SDimitry Andric   VectorType *VecTy = cast<VectorType>(Vals[0]->getType());
2078*0fca6ea1SDimitry Andric #ifndef NDEBUG
2079*0fca6ea1SDimitry Andric   for (Value *Val : Vals)
2080*0fca6ea1SDimitry Andric     assert(Val->getType() == VecTy && "Tried to interleave mismatched types");
2081*0fca6ea1SDimitry Andric #endif
2082*0fca6ea1SDimitry Andric 
2083*0fca6ea1SDimitry Andric   // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
2084*0fca6ea1SDimitry Andric   // must use intrinsics to interleave.
2085*0fca6ea1SDimitry Andric   if (VecTy->isScalableTy()) {
2086*0fca6ea1SDimitry Andric     VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy);
2087*0fca6ea1SDimitry Andric     return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,
2088*0fca6ea1SDimitry Andric                                    Vals,
2089*0fca6ea1SDimitry Andric                                    /*FMFSource=*/nullptr, Name);
2090*0fca6ea1SDimitry Andric   }
2091*0fca6ea1SDimitry Andric 
2092*0fca6ea1SDimitry Andric   // Fixed length. Start by concatenating all vectors into a wide vector.
2093*0fca6ea1SDimitry Andric   Value *WideVec = concatenateVectors(Builder, Vals);
2094*0fca6ea1SDimitry Andric 
2095*0fca6ea1SDimitry Andric   // Interleave the elements into the wide vector.
2096*0fca6ea1SDimitry Andric   const unsigned NumElts = VecTy->getElementCount().getFixedValue();
2097*0fca6ea1SDimitry Andric   return Builder.CreateShuffleVector(
2098*0fca6ea1SDimitry Andric       WideVec, createInterleaveMask(NumElts, Factor), Name);
2099*0fca6ea1SDimitry Andric }
2100*0fca6ea1SDimitry Andric 
2101*0fca6ea1SDimitry Andric // Try to vectorize the interleave group that \p Instr belongs to.
2102*0fca6ea1SDimitry Andric //
2103*0fca6ea1SDimitry Andric // E.g. Translate following interleaved load group (factor = 3):
2104*0fca6ea1SDimitry Andric //   for (i = 0; i < N; i+=3) {
2105*0fca6ea1SDimitry Andric //     R = Pic[i];             // Member of index 0
2106*0fca6ea1SDimitry Andric //     G = Pic[i+1];           // Member of index 1
2107*0fca6ea1SDimitry Andric //     B = Pic[i+2];           // Member of index 2
2108*0fca6ea1SDimitry Andric //     ... // do something to R, G, B
2109*0fca6ea1SDimitry Andric //   }
2110*0fca6ea1SDimitry Andric // To:
2111*0fca6ea1SDimitry Andric //   %wide.vec = load <12 x i32>                       ; Read 4 tuples of R,G,B
2112*0fca6ea1SDimitry Andric //   %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9>   ; R elements
2113*0fca6ea1SDimitry Andric //   %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10>  ; G elements
2114*0fca6ea1SDimitry Andric //   %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11>  ; B elements
2115*0fca6ea1SDimitry Andric //
2116*0fca6ea1SDimitry Andric // Or translate following interleaved store group (factor = 3):
2117*0fca6ea1SDimitry Andric //   for (i = 0; i < N; i+=3) {
2118*0fca6ea1SDimitry Andric //     ... do something to R, G, B
2119*0fca6ea1SDimitry Andric //     Pic[i]   = R;           // Member of index 0
2120*0fca6ea1SDimitry Andric //     Pic[i+1] = G;           // Member of index 1
2121*0fca6ea1SDimitry Andric //     Pic[i+2] = B;           // Member of index 2
2122*0fca6ea1SDimitry Andric //   }
2123*0fca6ea1SDimitry Andric // To:
2124*0fca6ea1SDimitry Andric //   %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
2125*0fca6ea1SDimitry Andric //   %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>
2126*0fca6ea1SDimitry Andric //   %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
2127*0fca6ea1SDimitry Andric //        <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>    ; Interleave R,G,B elements
2128*0fca6ea1SDimitry Andric //   store <12 x i32> %interleaved.vec              ; Write 4 tuples of R,G,B
2129*0fca6ea1SDimitry Andric void VPInterleaveRecipe::execute(VPTransformState &State) {
2130*0fca6ea1SDimitry Andric   assert(!State.Instance && "Interleave group being replicated.");
2131*0fca6ea1SDimitry Andric   const InterleaveGroup<Instruction> *Group = IG;
2132*0fca6ea1SDimitry Andric   Instruction *Instr = Group->getInsertPos();
2133*0fca6ea1SDimitry Andric 
2134*0fca6ea1SDimitry Andric   // Prepare for the vector type of the interleaved load/store.
2135*0fca6ea1SDimitry Andric   Type *ScalarTy = getLoadStoreType(Instr);
2136*0fca6ea1SDimitry Andric   unsigned InterleaveFactor = Group->getFactor();
2137*0fca6ea1SDimitry Andric   auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);
2138*0fca6ea1SDimitry Andric 
2139*0fca6ea1SDimitry Andric   // Prepare for the new pointers.
2140*0fca6ea1SDimitry Andric   SmallVector<Value *, 2> AddrParts;
2141*0fca6ea1SDimitry Andric   unsigned Index = Group->getIndex(Instr);
2142*0fca6ea1SDimitry Andric 
2143*0fca6ea1SDimitry Andric   // TODO: extend the masked interleaved-group support to reversed access.
2144*0fca6ea1SDimitry Andric   VPValue *BlockInMask = getMask();
2145*0fca6ea1SDimitry Andric   assert((!BlockInMask || !Group->isReverse()) &&
2146*0fca6ea1SDimitry Andric          "Reversed masked interleave-group not supported.");
2147*0fca6ea1SDimitry Andric 
2148*0fca6ea1SDimitry Andric   Value *Idx;
2149*0fca6ea1SDimitry Andric   // If the group is reverse, adjust the index to refer to the last vector lane
2150*0fca6ea1SDimitry Andric   // instead of the first. We adjust the index from the first vector lane,
2151*0fca6ea1SDimitry Andric   // rather than directly getting the pointer for lane VF - 1, because the
2152*0fca6ea1SDimitry Andric   // pointer operand of the interleaved access is supposed to be uniform. For
2153*0fca6ea1SDimitry Andric   // uniform instructions, we're only required to generate a value for the
2154*0fca6ea1SDimitry Andric   // first vector lane in each unroll iteration.
2155*0fca6ea1SDimitry Andric   if (Group->isReverse()) {
2156*0fca6ea1SDimitry Andric     Value *RuntimeVF =
2157*0fca6ea1SDimitry Andric         getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
2158*0fca6ea1SDimitry Andric     Idx = State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));
2159*0fca6ea1SDimitry Andric     Idx = State.Builder.CreateMul(Idx,
2160*0fca6ea1SDimitry Andric                                   State.Builder.getInt32(Group->getFactor()));
2161*0fca6ea1SDimitry Andric     Idx = State.Builder.CreateAdd(Idx, State.Builder.getInt32(Index));
2162*0fca6ea1SDimitry Andric     Idx = State.Builder.CreateNeg(Idx);
2163*0fca6ea1SDimitry Andric   } else
2164*0fca6ea1SDimitry Andric     Idx = State.Builder.getInt32(-Index);
2165*0fca6ea1SDimitry Andric 
2166*0fca6ea1SDimitry Andric   VPValue *Addr = getAddr();
2167*0fca6ea1SDimitry Andric   for (unsigned Part = 0; Part < State.UF; Part++) {
2168*0fca6ea1SDimitry Andric     Value *AddrPart = State.get(Addr, VPIteration(Part, 0));
2169*0fca6ea1SDimitry Andric     if (auto *I = dyn_cast<Instruction>(AddrPart))
2170*0fca6ea1SDimitry Andric       State.setDebugLocFrom(I->getDebugLoc());
2171*0fca6ea1SDimitry Andric 
2172*0fca6ea1SDimitry Andric     // Notice current instruction could be any index. Need to adjust the address
2173*0fca6ea1SDimitry Andric     // to the member of index 0.
2174*0fca6ea1SDimitry Andric     //
2175*0fca6ea1SDimitry Andric     // E.g.  a = A[i+1];     // Member of index 1 (Current instruction)
2176*0fca6ea1SDimitry Andric     //       b = A[i];       // Member of index 0
2177*0fca6ea1SDimitry Andric     // Current pointer is pointed to A[i+1], adjust it to A[i].
2178*0fca6ea1SDimitry Andric     //
2179*0fca6ea1SDimitry Andric     // E.g.  A[i+1] = a;     // Member of index 1
2180*0fca6ea1SDimitry Andric     //       A[i]   = b;     // Member of index 0
2181*0fca6ea1SDimitry Andric     //       A[i+2] = c;     // Member of index 2 (Current instruction)
2182*0fca6ea1SDimitry Andric     // Current pointer is pointed to A[i+2], adjust it to A[i].
2183*0fca6ea1SDimitry Andric 
2184*0fca6ea1SDimitry Andric     bool InBounds = false;
2185*0fca6ea1SDimitry Andric     if (auto *gep = dyn_cast<GetElementPtrInst>(AddrPart->stripPointerCasts()))
2186*0fca6ea1SDimitry Andric       InBounds = gep->isInBounds();
2187*0fca6ea1SDimitry Andric     AddrPart = State.Builder.CreateGEP(ScalarTy, AddrPart, Idx, "", InBounds);
2188*0fca6ea1SDimitry Andric     AddrParts.push_back(AddrPart);
2189*0fca6ea1SDimitry Andric   }
2190*0fca6ea1SDimitry Andric 
2191*0fca6ea1SDimitry Andric   State.setDebugLocFrom(Instr->getDebugLoc());
2192*0fca6ea1SDimitry Andric   Value *PoisonVec = PoisonValue::get(VecTy);
2193*0fca6ea1SDimitry Andric 
2194*0fca6ea1SDimitry Andric   auto CreateGroupMask = [&BlockInMask, &State, &InterleaveFactor](
2195*0fca6ea1SDimitry Andric                              unsigned Part, Value *MaskForGaps) -> Value * {
2196*0fca6ea1SDimitry Andric     if (State.VF.isScalable()) {
2197*0fca6ea1SDimitry Andric       assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
2198*0fca6ea1SDimitry Andric       assert(InterleaveFactor == 2 &&
2199*0fca6ea1SDimitry Andric              "Unsupported deinterleave factor for scalable vectors");
2200*0fca6ea1SDimitry Andric       auto *BlockInMaskPart = State.get(BlockInMask, Part);
2201*0fca6ea1SDimitry Andric       SmallVector<Value *, 2> Ops = {BlockInMaskPart, BlockInMaskPart};
2202*0fca6ea1SDimitry Andric       auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),
2203*0fca6ea1SDimitry Andric                                      State.VF.getKnownMinValue() * 2, true);
2204*0fca6ea1SDimitry Andric       return State.Builder.CreateIntrinsic(
2205*0fca6ea1SDimitry Andric           MaskTy, Intrinsic::vector_interleave2, Ops,
2206*0fca6ea1SDimitry Andric           /*FMFSource=*/nullptr, "interleaved.mask");
2207*0fca6ea1SDimitry Andric     }
2208*0fca6ea1SDimitry Andric 
2209*0fca6ea1SDimitry Andric     if (!BlockInMask)
2210*0fca6ea1SDimitry Andric       return MaskForGaps;
2211*0fca6ea1SDimitry Andric 
2212*0fca6ea1SDimitry Andric     Value *BlockInMaskPart = State.get(BlockInMask, Part);
2213*0fca6ea1SDimitry Andric     Value *ShuffledMask = State.Builder.CreateShuffleVector(
2214*0fca6ea1SDimitry Andric         BlockInMaskPart,
2215*0fca6ea1SDimitry Andric         createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),
2216*0fca6ea1SDimitry Andric         "interleaved.mask");
2217*0fca6ea1SDimitry Andric     return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
2218*0fca6ea1SDimitry Andric                                                    ShuffledMask, MaskForGaps)
2219*0fca6ea1SDimitry Andric                        : ShuffledMask;
2220*0fca6ea1SDimitry Andric   };
2221*0fca6ea1SDimitry Andric 
2222*0fca6ea1SDimitry Andric   const DataLayout &DL = Instr->getDataLayout();
2223*0fca6ea1SDimitry Andric   // Vectorize the interleaved load group.
2224*0fca6ea1SDimitry Andric   if (isa<LoadInst>(Instr)) {
2225*0fca6ea1SDimitry Andric     Value *MaskForGaps = nullptr;
2226*0fca6ea1SDimitry Andric     if (NeedsMaskForGaps) {
2227*0fca6ea1SDimitry Andric       MaskForGaps = createBitMaskForGaps(State.Builder,
2228*0fca6ea1SDimitry Andric                                          State.VF.getKnownMinValue(), *Group);
2229*0fca6ea1SDimitry Andric       assert(MaskForGaps && "Mask for Gaps is required but it is null");
2230*0fca6ea1SDimitry Andric     }
2231*0fca6ea1SDimitry Andric 
2232*0fca6ea1SDimitry Andric     // For each unroll part, create a wide load for the group.
2233*0fca6ea1SDimitry Andric     SmallVector<Value *, 2> NewLoads;
2234*0fca6ea1SDimitry Andric     for (unsigned Part = 0; Part < State.UF; Part++) {
2235*0fca6ea1SDimitry Andric       Instruction *NewLoad;
2236*0fca6ea1SDimitry Andric       if (BlockInMask || MaskForGaps) {
2237*0fca6ea1SDimitry Andric         Value *GroupMask = CreateGroupMask(Part, MaskForGaps);
2238*0fca6ea1SDimitry Andric         NewLoad = State.Builder.CreateMaskedLoad(VecTy, AddrParts[Part],
2239*0fca6ea1SDimitry Andric                                                  Group->getAlign(), GroupMask,
2240*0fca6ea1SDimitry Andric                                                  PoisonVec, "wide.masked.vec");
2241*0fca6ea1SDimitry Andric       } else
2242*0fca6ea1SDimitry Andric         NewLoad = State.Builder.CreateAlignedLoad(
2243*0fca6ea1SDimitry Andric             VecTy, AddrParts[Part], Group->getAlign(), "wide.vec");
2244*0fca6ea1SDimitry Andric       Group->addMetadata(NewLoad);
2245*0fca6ea1SDimitry Andric       NewLoads.push_back(NewLoad);
2246*0fca6ea1SDimitry Andric     }
2247*0fca6ea1SDimitry Andric 
2248*0fca6ea1SDimitry Andric     ArrayRef<VPValue *> VPDefs = definedValues();
2249*0fca6ea1SDimitry Andric     const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
2250*0fca6ea1SDimitry Andric     if (VecTy->isScalableTy()) {
2251*0fca6ea1SDimitry Andric       assert(InterleaveFactor == 2 &&
2252*0fca6ea1SDimitry Andric              "Unsupported deinterleave factor for scalable vectors");
2253*0fca6ea1SDimitry Andric 
2254*0fca6ea1SDimitry Andric       for (unsigned Part = 0; Part < State.UF; ++Part) {
2255*0fca6ea1SDimitry Andric         // Scalable vectors cannot use arbitrary shufflevectors (only splats),
2256*0fca6ea1SDimitry Andric         // so must use intrinsics to deinterleave.
2257*0fca6ea1SDimitry Andric         Value *DI = State.Builder.CreateIntrinsic(
2258*0fca6ea1SDimitry Andric             Intrinsic::vector_deinterleave2, VecTy, NewLoads[Part],
2259*0fca6ea1SDimitry Andric             /*FMFSource=*/nullptr, "strided.vec");
2260*0fca6ea1SDimitry Andric         unsigned J = 0;
2261*0fca6ea1SDimitry Andric         for (unsigned I = 0; I < InterleaveFactor; ++I) {
2262*0fca6ea1SDimitry Andric           Instruction *Member = Group->getMember(I);
2263*0fca6ea1SDimitry Andric 
2264*0fca6ea1SDimitry Andric           if (!Member)
2265*0fca6ea1SDimitry Andric             continue;
2266*0fca6ea1SDimitry Andric 
2267*0fca6ea1SDimitry Andric           Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
2268*0fca6ea1SDimitry Andric           // If this member has different type, cast the result type.
2269*0fca6ea1SDimitry Andric           if (Member->getType() != ScalarTy) {
2270*0fca6ea1SDimitry Andric             VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
2271*0fca6ea1SDimitry Andric             StridedVec =
2272*0fca6ea1SDimitry Andric                 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
2273*0fca6ea1SDimitry Andric           }
2274*0fca6ea1SDimitry Andric 
2275*0fca6ea1SDimitry Andric           if (Group->isReverse())
2276*0fca6ea1SDimitry Andric             StridedVec =
2277*0fca6ea1SDimitry Andric                 State.Builder.CreateVectorReverse(StridedVec, "reverse");
2278*0fca6ea1SDimitry Andric 
2279*0fca6ea1SDimitry Andric           State.set(VPDefs[J], StridedVec, Part);
2280*0fca6ea1SDimitry Andric           ++J;
2281*0fca6ea1SDimitry Andric         }
2282*0fca6ea1SDimitry Andric       }
2283*0fca6ea1SDimitry Andric 
2284*0fca6ea1SDimitry Andric       return;
2285*0fca6ea1SDimitry Andric     }
2286*0fca6ea1SDimitry Andric 
2287*0fca6ea1SDimitry Andric     // For each member in the group, shuffle out the appropriate data from the
2288*0fca6ea1SDimitry Andric     // wide loads.
2289*0fca6ea1SDimitry Andric     unsigned J = 0;
2290*0fca6ea1SDimitry Andric     for (unsigned I = 0; I < InterleaveFactor; ++I) {
2291*0fca6ea1SDimitry Andric       Instruction *Member = Group->getMember(I);
2292*0fca6ea1SDimitry Andric 
2293*0fca6ea1SDimitry Andric       // Skip the gaps in the group.
2294*0fca6ea1SDimitry Andric       if (!Member)
2295*0fca6ea1SDimitry Andric         continue;
2296*0fca6ea1SDimitry Andric 
2297*0fca6ea1SDimitry Andric       auto StrideMask =
2298*0fca6ea1SDimitry Andric           createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());
2299*0fca6ea1SDimitry Andric       for (unsigned Part = 0; Part < State.UF; Part++) {
2300*0fca6ea1SDimitry Andric         Value *StridedVec = State.Builder.CreateShuffleVector(
2301*0fca6ea1SDimitry Andric             NewLoads[Part], StrideMask, "strided.vec");
2302*0fca6ea1SDimitry Andric 
2303*0fca6ea1SDimitry Andric         // If this member has different type, cast the result type.
2304*0fca6ea1SDimitry Andric         if (Member->getType() != ScalarTy) {
2305*0fca6ea1SDimitry Andric           assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
2306*0fca6ea1SDimitry Andric           VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
2307*0fca6ea1SDimitry Andric           StridedVec =
2308*0fca6ea1SDimitry Andric               createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
2309*0fca6ea1SDimitry Andric         }
2310*0fca6ea1SDimitry Andric 
2311*0fca6ea1SDimitry Andric         if (Group->isReverse())
2312*0fca6ea1SDimitry Andric           StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
2313*0fca6ea1SDimitry Andric 
2314*0fca6ea1SDimitry Andric         State.set(VPDefs[J], StridedVec, Part);
2315*0fca6ea1SDimitry Andric       }
2316*0fca6ea1SDimitry Andric       ++J;
2317*0fca6ea1SDimitry Andric     }
2318*0fca6ea1SDimitry Andric     return;
2319*0fca6ea1SDimitry Andric   }
2320*0fca6ea1SDimitry Andric 
2321*0fca6ea1SDimitry Andric   // The sub vector type for current instruction.
2322*0fca6ea1SDimitry Andric   auto *SubVT = VectorType::get(ScalarTy, State.VF);
2323*0fca6ea1SDimitry Andric 
2324*0fca6ea1SDimitry Andric   // Vectorize the interleaved store group.
2325*0fca6ea1SDimitry Andric   Value *MaskForGaps =
2326*0fca6ea1SDimitry Andric       createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group);
2327*0fca6ea1SDimitry Andric   assert((!MaskForGaps || !State.VF.isScalable()) &&
2328*0fca6ea1SDimitry Andric          "masking gaps for scalable vectors is not yet supported.");
2329*0fca6ea1SDimitry Andric   ArrayRef<VPValue *> StoredValues = getStoredValues();
2330*0fca6ea1SDimitry Andric   for (unsigned Part = 0; Part < State.UF; Part++) {
2331*0fca6ea1SDimitry Andric     // Collect the stored vector from each member.
2332*0fca6ea1SDimitry Andric     SmallVector<Value *, 4> StoredVecs;
2333*0fca6ea1SDimitry Andric     unsigned StoredIdx = 0;
2334*0fca6ea1SDimitry Andric     for (unsigned i = 0; i < InterleaveFactor; i++) {
2335*0fca6ea1SDimitry Andric       assert((Group->getMember(i) || MaskForGaps) &&
2336*0fca6ea1SDimitry Andric              "Fail to get a member from an interleaved store group");
2337*0fca6ea1SDimitry Andric       Instruction *Member = Group->getMember(i);
2338*0fca6ea1SDimitry Andric 
2339*0fca6ea1SDimitry Andric       // Skip the gaps in the group.
2340*0fca6ea1SDimitry Andric       if (!Member) {
2341*0fca6ea1SDimitry Andric         Value *Undef = PoisonValue::get(SubVT);
2342*0fca6ea1SDimitry Andric         StoredVecs.push_back(Undef);
2343*0fca6ea1SDimitry Andric         continue;
2344*0fca6ea1SDimitry Andric       }
2345*0fca6ea1SDimitry Andric 
2346*0fca6ea1SDimitry Andric       Value *StoredVec = State.get(StoredValues[StoredIdx], Part);
2347*0fca6ea1SDimitry Andric       ++StoredIdx;
2348*0fca6ea1SDimitry Andric 
2349*0fca6ea1SDimitry Andric       if (Group->isReverse())
2350*0fca6ea1SDimitry Andric         StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");
2351*0fca6ea1SDimitry Andric 
2352*0fca6ea1SDimitry Andric       // If this member has different type, cast it to a unified type.
2353*0fca6ea1SDimitry Andric 
2354*0fca6ea1SDimitry Andric       if (StoredVec->getType() != SubVT)
2355*0fca6ea1SDimitry Andric         StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);
2356*0fca6ea1SDimitry Andric 
2357*0fca6ea1SDimitry Andric       StoredVecs.push_back(StoredVec);
2358*0fca6ea1SDimitry Andric     }
2359*0fca6ea1SDimitry Andric 
2360*0fca6ea1SDimitry Andric     // Interleave all the smaller vectors into one wider vector.
2361*0fca6ea1SDimitry Andric     Value *IVec =
2362*0fca6ea1SDimitry Andric         interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");
2363*0fca6ea1SDimitry Andric     Instruction *NewStoreInstr;
2364*0fca6ea1SDimitry Andric     if (BlockInMask || MaskForGaps) {
2365*0fca6ea1SDimitry Andric       Value *GroupMask = CreateGroupMask(Part, MaskForGaps);
2366*0fca6ea1SDimitry Andric       NewStoreInstr = State.Builder.CreateMaskedStore(
2367*0fca6ea1SDimitry Andric           IVec, AddrParts[Part], Group->getAlign(), GroupMask);
2368*0fca6ea1SDimitry Andric     } else
2369*0fca6ea1SDimitry Andric       NewStoreInstr = State.Builder.CreateAlignedStore(IVec, AddrParts[Part],
2370*0fca6ea1SDimitry Andric                                                        Group->getAlign());
2371*0fca6ea1SDimitry Andric 
2372*0fca6ea1SDimitry Andric     Group->addMetadata(NewStoreInstr);
2373*0fca6ea1SDimitry Andric   }
2374*0fca6ea1SDimitry Andric }
2375*0fca6ea1SDimitry Andric 
2376*0fca6ea1SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2377*0fca6ea1SDimitry Andric void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent,
2378*0fca6ea1SDimitry Andric                                VPSlotTracker &SlotTracker) const {
2379*0fca6ea1SDimitry Andric   O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
2380*0fca6ea1SDimitry Andric   IG->getInsertPos()->printAsOperand(O, false);
2381*0fca6ea1SDimitry Andric   O << ", ";
2382*0fca6ea1SDimitry Andric   getAddr()->printAsOperand(O, SlotTracker);
2383*0fca6ea1SDimitry Andric   VPValue *Mask = getMask();
2384*0fca6ea1SDimitry Andric   if (Mask) {
2385*0fca6ea1SDimitry Andric     O << ", ";
2386*0fca6ea1SDimitry Andric     Mask->printAsOperand(O, SlotTracker);
2387*0fca6ea1SDimitry Andric   }
2388*0fca6ea1SDimitry Andric 
2389*0fca6ea1SDimitry Andric   unsigned OpIdx = 0;
2390*0fca6ea1SDimitry Andric   for (unsigned i = 0; i < IG->getFactor(); ++i) {
2391*0fca6ea1SDimitry Andric     if (!IG->getMember(i))
2392*0fca6ea1SDimitry Andric       continue;
2393*0fca6ea1SDimitry Andric     if (getNumStoreOperands() > 0) {
2394*0fca6ea1SDimitry Andric       O << "\n" << Indent << "  store ";
2395*0fca6ea1SDimitry Andric       getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker);
2396*0fca6ea1SDimitry Andric       O << " to index " << i;
2397*0fca6ea1SDimitry Andric     } else {
2398*0fca6ea1SDimitry Andric       O << "\n" << Indent << "  ";
2399*0fca6ea1SDimitry Andric       getVPValue(OpIdx)->printAsOperand(O, SlotTracker);
2400*0fca6ea1SDimitry Andric       O << " = load from index " << i;
2401*0fca6ea1SDimitry Andric     }
2402*0fca6ea1SDimitry Andric     ++OpIdx;
2403*0fca6ea1SDimitry Andric   }
2404*0fca6ea1SDimitry Andric }
240581ad6265SDimitry Andric #endif
240681ad6265SDimitry Andric 
240781ad6265SDimitry Andric void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
240881ad6265SDimitry Andric   Value *Start = getStartValue()->getLiveInIRValue();
24095f757f3fSDimitry Andric   PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index");
24105f757f3fSDimitry Andric   EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
241181ad6265SDimitry Andric 
241281ad6265SDimitry Andric   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
241381ad6265SDimitry Andric   EntryPart->addIncoming(Start, VectorPH);
24145f757f3fSDimitry Andric   EntryPart->setDebugLoc(getDebugLoc());
241581ad6265SDimitry Andric   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
2416*0fca6ea1SDimitry Andric     State.set(this, EntryPart, Part, /*IsScalar*/ true);
241781ad6265SDimitry Andric }
241881ad6265SDimitry Andric 
241981ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
242081ad6265SDimitry Andric void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
242181ad6265SDimitry Andric                                    VPSlotTracker &SlotTracker) const {
242281ad6265SDimitry Andric   O << Indent << "EMIT ";
242381ad6265SDimitry Andric   printAsOperand(O, SlotTracker);
242481ad6265SDimitry Andric   O << " = CANONICAL-INDUCTION ";
24255f757f3fSDimitry Andric   printOperands(O, SlotTracker);
242681ad6265SDimitry Andric }
242781ad6265SDimitry Andric #endif
242881ad6265SDimitry Andric 
242906c3fb27SDimitry Andric bool VPCanonicalIVPHIRecipe::isCanonical(
2430*0fca6ea1SDimitry Andric     InductionDescriptor::InductionKind Kind, VPValue *Start,
2431*0fca6ea1SDimitry Andric     VPValue *Step) const {
2432*0fca6ea1SDimitry Andric   // Must be an integer induction.
2433*0fca6ea1SDimitry Andric   if (Kind != InductionDescriptor::IK_IntInduction)
2434bdd1243dSDimitry Andric     return false;
243506c3fb27SDimitry Andric   // Start must match the start value of this canonical induction.
243606c3fb27SDimitry Andric   if (Start != getStartValue())
2437bdd1243dSDimitry Andric     return false;
2438bdd1243dSDimitry Andric 
243906c3fb27SDimitry Andric   // If the step is defined by a recipe, it is not a ConstantInt.
244006c3fb27SDimitry Andric   if (Step->getDefiningRecipe())
244106c3fb27SDimitry Andric     return false;
244206c3fb27SDimitry Andric 
244306c3fb27SDimitry Andric   ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
244406c3fb27SDimitry Andric   return StepC && StepC->isOne();
2445bdd1243dSDimitry Andric }
2446bdd1243dSDimitry Andric 
2447*0fca6ea1SDimitry Andric bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) {
24486246ae0bSDimitry Andric   return IsScalarAfterVectorization &&
2449*0fca6ea1SDimitry Andric          (!IsScalable || vputils::onlyFirstLaneUsed(this));
245081ad6265SDimitry Andric }
245181ad6265SDimitry Andric 
245281ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
245381ad6265SDimitry Andric void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
245481ad6265SDimitry Andric                                           VPSlotTracker &SlotTracker) const {
245581ad6265SDimitry Andric   O << Indent << "EMIT ";
245681ad6265SDimitry Andric   printAsOperand(O, SlotTracker);
245781ad6265SDimitry Andric   O << " = WIDEN-POINTER-INDUCTION ";
245881ad6265SDimitry Andric   getStartValue()->printAsOperand(O, SlotTracker);
245981ad6265SDimitry Andric   O << ", " << *IndDesc.getStep();
246081ad6265SDimitry Andric }
246181ad6265SDimitry Andric #endif
246281ad6265SDimitry Andric 
246381ad6265SDimitry Andric void VPExpandSCEVRecipe::execute(VPTransformState &State) {
246481ad6265SDimitry Andric   assert(!State.Instance && "cannot be used in per-lane");
2465*0fca6ea1SDimitry Andric   const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
246681ad6265SDimitry Andric   SCEVExpander Exp(SE, DL, "induction");
246781ad6265SDimitry Andric 
246881ad6265SDimitry Andric   Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
246981ad6265SDimitry Andric                                  &*State.Builder.GetInsertPoint());
247006c3fb27SDimitry Andric   assert(!State.ExpandedSCEVs.contains(Expr) &&
247106c3fb27SDimitry Andric          "Same SCEV expanded multiple times");
247206c3fb27SDimitry Andric   State.ExpandedSCEVs[Expr] = Res;
247381ad6265SDimitry Andric   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
247406c3fb27SDimitry Andric     State.set(this, Res, {Part, 0});
247581ad6265SDimitry Andric }
247681ad6265SDimitry Andric 
247781ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
247881ad6265SDimitry Andric void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
247981ad6265SDimitry Andric                                VPSlotTracker &SlotTracker) const {
248081ad6265SDimitry Andric   O << Indent << "EMIT ";
248181ad6265SDimitry Andric   getVPSingleValue()->printAsOperand(O, SlotTracker);
248281ad6265SDimitry Andric   O << " = EXPAND SCEV " << *Expr;
248381ad6265SDimitry Andric }
248481ad6265SDimitry Andric #endif
248581ad6265SDimitry Andric 
248681ad6265SDimitry Andric void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
2487*0fca6ea1SDimitry Andric   Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true);
248881ad6265SDimitry Andric   Type *STy = CanonicalIV->getType();
248981ad6265SDimitry Andric   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
249081ad6265SDimitry Andric   ElementCount VF = State.VF;
249181ad6265SDimitry Andric   Value *VStart = VF.isScalar()
249281ad6265SDimitry Andric                       ? CanonicalIV
249381ad6265SDimitry Andric                       : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
249481ad6265SDimitry Andric   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
249581ad6265SDimitry Andric     Value *VStep = createStepForVF(Builder, STy, VF, Part);
249681ad6265SDimitry Andric     if (VF.isVector()) {
249781ad6265SDimitry Andric       VStep = Builder.CreateVectorSplat(VF, VStep);
249881ad6265SDimitry Andric       VStep =
249981ad6265SDimitry Andric           Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
250081ad6265SDimitry Andric     }
250181ad6265SDimitry Andric     Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
250281ad6265SDimitry Andric     State.set(this, CanonicalVectorIV, Part);
250381ad6265SDimitry Andric   }
250481ad6265SDimitry Andric }
250581ad6265SDimitry Andric 
250681ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
250781ad6265SDimitry Andric void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
250881ad6265SDimitry Andric                                      VPSlotTracker &SlotTracker) const {
250981ad6265SDimitry Andric   O << Indent << "EMIT ";
251081ad6265SDimitry Andric   printAsOperand(O, SlotTracker);
251181ad6265SDimitry Andric   O << " = WIDEN-CANONICAL-INDUCTION ";
251281ad6265SDimitry Andric   printOperands(O, SlotTracker);
251381ad6265SDimitry Andric }
251481ad6265SDimitry Andric #endif
251581ad6265SDimitry Andric 
251681ad6265SDimitry Andric void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
251781ad6265SDimitry Andric   auto &Builder = State.Builder;
251881ad6265SDimitry Andric   // Create a vector from the initial value.
251981ad6265SDimitry Andric   auto *VectorInit = getStartValue()->getLiveInIRValue();
252081ad6265SDimitry Andric 
252181ad6265SDimitry Andric   Type *VecTy = State.VF.isScalar()
252281ad6265SDimitry Andric                     ? VectorInit->getType()
252381ad6265SDimitry Andric                     : VectorType::get(VectorInit->getType(), State.VF);
252481ad6265SDimitry Andric 
252581ad6265SDimitry Andric   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
252681ad6265SDimitry Andric   if (State.VF.isVector()) {
252781ad6265SDimitry Andric     auto *IdxTy = Builder.getInt32Ty();
252881ad6265SDimitry Andric     auto *One = ConstantInt::get(IdxTy, 1);
252981ad6265SDimitry Andric     IRBuilder<>::InsertPointGuard Guard(Builder);
253081ad6265SDimitry Andric     Builder.SetInsertPoint(VectorPH->getTerminator());
253181ad6265SDimitry Andric     auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
253281ad6265SDimitry Andric     auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
253381ad6265SDimitry Andric     VectorInit = Builder.CreateInsertElement(
253481ad6265SDimitry Andric         PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
253581ad6265SDimitry Andric   }
253681ad6265SDimitry Andric 
253781ad6265SDimitry Andric   // Create a phi node for the new recurrence.
25385f757f3fSDimitry Andric   PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur");
25395f757f3fSDimitry Andric   EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
254081ad6265SDimitry Andric   EntryPart->addIncoming(VectorInit, VectorPH);
254181ad6265SDimitry Andric   State.set(this, EntryPart, 0);
254281ad6265SDimitry Andric }
254381ad6265SDimitry Andric 
254481ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
254581ad6265SDimitry Andric void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
254681ad6265SDimitry Andric                                             VPSlotTracker &SlotTracker) const {
254781ad6265SDimitry Andric   O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
254881ad6265SDimitry Andric   printAsOperand(O, SlotTracker);
254981ad6265SDimitry Andric   O << " = phi ";
255081ad6265SDimitry Andric   printOperands(O, SlotTracker);
255181ad6265SDimitry Andric }
255281ad6265SDimitry Andric #endif
255381ad6265SDimitry Andric 
255481ad6265SDimitry Andric void VPReductionPHIRecipe::execute(VPTransformState &State) {
255581ad6265SDimitry Andric   auto &Builder = State.Builder;
255681ad6265SDimitry Andric 
25577a6dacacSDimitry Andric   // Reductions do not have to start at zero. They can start with
25587a6dacacSDimitry Andric   // any loop invariant values.
25597a6dacacSDimitry Andric   VPValue *StartVPV = getStartValue();
25607a6dacacSDimitry Andric   Value *StartV = StartVPV->getLiveInIRValue();
25617a6dacacSDimitry Andric 
256281ad6265SDimitry Andric   // In order to support recurrences we need to be able to vectorize Phi nodes.
256381ad6265SDimitry Andric   // Phi nodes have cycles, so we need to vectorize them in two stages. This is
256481ad6265SDimitry Andric   // stage #1: We create a new vector PHI node with no incoming edges. We'll use
256581ad6265SDimitry Andric   // this value when we vectorize all of the instructions that use the PHI.
256681ad6265SDimitry Andric   bool ScalarPHI = State.VF.isScalar() || IsInLoop;
25677a6dacacSDimitry Andric   Type *VecTy = ScalarPHI ? StartV->getType()
25687a6dacacSDimitry Andric                           : VectorType::get(StartV->getType(), State.VF);
256981ad6265SDimitry Andric 
257081ad6265SDimitry Andric   BasicBlock *HeaderBB = State.CFG.PrevBB;
257181ad6265SDimitry Andric   assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
257281ad6265SDimitry Andric          "recipe must be in the vector loop header");
257381ad6265SDimitry Andric   unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
257481ad6265SDimitry Andric   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
25755f757f3fSDimitry Andric     Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");
25765f757f3fSDimitry Andric     EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());
2577*0fca6ea1SDimitry Andric     State.set(this, EntryPart, Part, IsInLoop);
257881ad6265SDimitry Andric   }
257981ad6265SDimitry Andric 
258081ad6265SDimitry Andric   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
258181ad6265SDimitry Andric 
258281ad6265SDimitry Andric   Value *Iden = nullptr;
258381ad6265SDimitry Andric   RecurKind RK = RdxDesc.getRecurrenceKind();
258481ad6265SDimitry Andric   if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
25855f757f3fSDimitry Andric       RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
25865f757f3fSDimitry Andric     // MinMax and AnyOf reductions have the start value as their identity.
258781ad6265SDimitry Andric     if (ScalarPHI) {
258881ad6265SDimitry Andric       Iden = StartV;
258981ad6265SDimitry Andric     } else {
259081ad6265SDimitry Andric       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
259181ad6265SDimitry Andric       Builder.SetInsertPoint(VectorPH->getTerminator());
259281ad6265SDimitry Andric       StartV = Iden =
259381ad6265SDimitry Andric           Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
259481ad6265SDimitry Andric     }
259581ad6265SDimitry Andric   } else {
259681ad6265SDimitry Andric     Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
259781ad6265SDimitry Andric                                          RdxDesc.getFastMathFlags());
259881ad6265SDimitry Andric 
259981ad6265SDimitry Andric     if (!ScalarPHI) {
260081ad6265SDimitry Andric       Iden = Builder.CreateVectorSplat(State.VF, Iden);
260181ad6265SDimitry Andric       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
260281ad6265SDimitry Andric       Builder.SetInsertPoint(VectorPH->getTerminator());
260381ad6265SDimitry Andric       Constant *Zero = Builder.getInt32(0);
260481ad6265SDimitry Andric       StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
260581ad6265SDimitry Andric     }
260681ad6265SDimitry Andric   }
260781ad6265SDimitry Andric 
260881ad6265SDimitry Andric   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
2609*0fca6ea1SDimitry Andric     Value *EntryPart = State.get(this, Part, IsInLoop);
261081ad6265SDimitry Andric     // Make sure to add the reduction start value only to the
261181ad6265SDimitry Andric     // first unroll part.
261281ad6265SDimitry Andric     Value *StartVal = (Part == 0) ? StartV : Iden;
261381ad6265SDimitry Andric     cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
261481ad6265SDimitry Andric   }
261581ad6265SDimitry Andric }
261681ad6265SDimitry Andric 
261781ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
261881ad6265SDimitry Andric void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
261981ad6265SDimitry Andric                                  VPSlotTracker &SlotTracker) const {
262081ad6265SDimitry Andric   O << Indent << "WIDEN-REDUCTION-PHI ";
262181ad6265SDimitry Andric 
262281ad6265SDimitry Andric   printAsOperand(O, SlotTracker);
262381ad6265SDimitry Andric   O << " = phi ";
262481ad6265SDimitry Andric   printOperands(O, SlotTracker);
262581ad6265SDimitry Andric }
262681ad6265SDimitry Andric #endif
262781ad6265SDimitry Andric 
262881ad6265SDimitry Andric void VPWidenPHIRecipe::execute(VPTransformState &State) {
262981ad6265SDimitry Andric   assert(EnableVPlanNativePath &&
263081ad6265SDimitry Andric          "Non-native vplans are not expected to have VPWidenPHIRecipes.");
263181ad6265SDimitry Andric 
26325f757f3fSDimitry Andric   Value *Op0 = State.get(getOperand(0), 0);
263381ad6265SDimitry Andric   Type *VecTy = Op0->getType();
263481ad6265SDimitry Andric   Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
263581ad6265SDimitry Andric   State.set(this, VecPhi, 0);
263681ad6265SDimitry Andric }
263781ad6265SDimitry Andric 
263881ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
263981ad6265SDimitry Andric void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
264081ad6265SDimitry Andric                              VPSlotTracker &SlotTracker) const {
264181ad6265SDimitry Andric   O << Indent << "WIDEN-PHI ";
264281ad6265SDimitry Andric 
264381ad6265SDimitry Andric   auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
264481ad6265SDimitry Andric   // Unless all incoming values are modeled in VPlan  print the original PHI
264581ad6265SDimitry Andric   // directly.
264681ad6265SDimitry Andric   // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
264781ad6265SDimitry Andric   // values as VPValues.
264881ad6265SDimitry Andric   if (getNumOperands() != OriginalPhi->getNumOperands()) {
264981ad6265SDimitry Andric     O << VPlanIngredient(OriginalPhi);
265081ad6265SDimitry Andric     return;
265181ad6265SDimitry Andric   }
265281ad6265SDimitry Andric 
265381ad6265SDimitry Andric   printAsOperand(O, SlotTracker);
265481ad6265SDimitry Andric   O << " = phi ";
265581ad6265SDimitry Andric   printOperands(O, SlotTracker);
265681ad6265SDimitry Andric }
265781ad6265SDimitry Andric #endif
2658753f127fSDimitry Andric 
2659753f127fSDimitry Andric // TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2660753f127fSDimitry Andric // remove VPActiveLaneMaskPHIRecipe.
2661753f127fSDimitry Andric void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {
2662753f127fSDimitry Andric   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2663753f127fSDimitry Andric   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
2664753f127fSDimitry Andric     Value *StartMask = State.get(getOperand(0), Part);
2665753f127fSDimitry Andric     PHINode *EntryPart =
2666753f127fSDimitry Andric         State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
2667753f127fSDimitry Andric     EntryPart->addIncoming(StartMask, VectorPH);
26685f757f3fSDimitry Andric     EntryPart->setDebugLoc(getDebugLoc());
2669753f127fSDimitry Andric     State.set(this, EntryPart, Part);
2670753f127fSDimitry Andric   }
2671753f127fSDimitry Andric }
2672753f127fSDimitry Andric 
2673753f127fSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2674753f127fSDimitry Andric void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2675753f127fSDimitry Andric                                       VPSlotTracker &SlotTracker) const {
2676753f127fSDimitry Andric   O << Indent << "ACTIVE-LANE-MASK-PHI ";
2677753f127fSDimitry Andric 
2678753f127fSDimitry Andric   printAsOperand(O, SlotTracker);
2679753f127fSDimitry Andric   O << " = phi ";
2680753f127fSDimitry Andric   printOperands(O, SlotTracker);
2681753f127fSDimitry Andric }
2682753f127fSDimitry Andric #endif
2683*0fca6ea1SDimitry Andric 
2684*0fca6ea1SDimitry Andric void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) {
2685*0fca6ea1SDimitry Andric   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2686*0fca6ea1SDimitry Andric   assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization.");
2687*0fca6ea1SDimitry Andric   Value *Start = State.get(getOperand(0), VPIteration(0, 0));
2688*0fca6ea1SDimitry Andric   PHINode *EntryPart =
2689*0fca6ea1SDimitry Andric       State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv");
2690*0fca6ea1SDimitry Andric   EntryPart->addIncoming(Start, VectorPH);
2691*0fca6ea1SDimitry Andric   EntryPart->setDebugLoc(getDebugLoc());
2692*0fca6ea1SDimitry Andric   State.set(this, EntryPart, 0, /*IsScalar=*/true);
2693*0fca6ea1SDimitry Andric }
2694*0fca6ea1SDimitry Andric 
2695*0fca6ea1SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2696*0fca6ea1SDimitry Andric void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2697*0fca6ea1SDimitry Andric                                   VPSlotTracker &SlotTracker) const {
2698*0fca6ea1SDimitry Andric   O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
2699*0fca6ea1SDimitry Andric 
2700*0fca6ea1SDimitry Andric   printAsOperand(O, SlotTracker);
2701*0fca6ea1SDimitry Andric   O << " = phi ";
2702*0fca6ea1SDimitry Andric   printOperands(O, SlotTracker);
2703*0fca6ea1SDimitry Andric }
2704*0fca6ea1SDimitry Andric #endif
2705