xref: /llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (revision 713482fccf82d33c5c4ddb24538958617e1eb957)
103975b7fSFlorian Hahn //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
203975b7fSFlorian Hahn //
303975b7fSFlorian Hahn // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
403975b7fSFlorian Hahn // See https://llvm.org/LICENSE.txt for license information.
503975b7fSFlorian Hahn // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
603975b7fSFlorian Hahn //
703975b7fSFlorian Hahn //===----------------------------------------------------------------------===//
803975b7fSFlorian Hahn ///
903975b7fSFlorian Hahn /// \file
1003975b7fSFlorian Hahn /// This file contains implementations for different VPlan recipes.
1103975b7fSFlorian Hahn ///
1203975b7fSFlorian Hahn //===----------------------------------------------------------------------===//
1303975b7fSFlorian Hahn 
1409a29fccSFlorian Hahn #include "LoopVectorizationPlanner.h"
1503975b7fSFlorian Hahn #include "VPlan.h"
16b0b88643SFlorian Hahn #include "VPlanAnalysis.h"
171d9b3222SFlorian Hahn #include "VPlanPatternMatch.h"
1871ede8d8SRamkumar Ramachandra #include "VPlanUtils.h"
1903975b7fSFlorian Hahn #include "llvm/ADT/STLExtras.h"
2003975b7fSFlorian Hahn #include "llvm/ADT/SmallVector.h"
2103975b7fSFlorian Hahn #include "llvm/ADT/Twine.h"
2203975b7fSFlorian Hahn #include "llvm/Analysis/IVDescriptors.h"
2303975b7fSFlorian Hahn #include "llvm/IR/BasicBlock.h"
2403975b7fSFlorian Hahn #include "llvm/IR/IRBuilder.h"
2503975b7fSFlorian Hahn #include "llvm/IR/Instruction.h"
2603975b7fSFlorian Hahn #include "llvm/IR/Instructions.h"
276f1a8c2dSGraham Hunter #include "llvm/IR/Intrinsics.h"
2803975b7fSFlorian Hahn #include "llvm/IR/Type.h"
2903975b7fSFlorian Hahn #include "llvm/IR/Value.h"
3000e40c9bSKolya Panchenko #include "llvm/IR/VectorBuilder.h"
3103975b7fSFlorian Hahn #include "llvm/Support/Casting.h"
3203975b7fSFlorian Hahn #include "llvm/Support/CommandLine.h"
3303975b7fSFlorian Hahn #include "llvm/Support/Debug.h"
3403975b7fSFlorian Hahn #include "llvm/Support/raw_ostream.h"
35225e3ec6SFlorian Hahn #include "llvm/Transforms/Utils/BasicBlockUtils.h"
36241fe837SFlorian Hahn #include "llvm/Transforms/Utils/LoopUtils.h"
3703975b7fSFlorian Hahn #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
3803975b7fSFlorian Hahn #include <cassert>
3903975b7fSFlorian Hahn 
4003975b7fSFlorian Hahn using namespace llvm;
4103975b7fSFlorian Hahn 
425d135041SFlorian Hahn using VectorParts = SmallVector<Value *, 2>;
435d135041SFlorian Hahn 
441e692113SFangrui Song namespace llvm {
4503975b7fSFlorian Hahn extern cl::opt<bool> EnableVPlanNativePath;
461e692113SFangrui Song }
47b841e2ecSFlorian Hahn extern cl::opt<unsigned> ForceTargetInstructionCost;
4803975b7fSFlorian Hahn 
4913ae2134SFlorian Hahn #define LV_NAME "loop-vectorize"
5013ae2134SFlorian Hahn #define DEBUG_TYPE LV_NAME
5113ae2134SFlorian Hahn 
5203975b7fSFlorian Hahn bool VPRecipeBase::mayWriteToMemory() const {
5303975b7fSFlorian Hahn   switch (getVPDefID()) {
5468ed1728SFlorian Hahn   case VPInstructionSC:
55f0d5104cSLuke Lau     return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
563d422a98SShih-Po Hung   case VPInterleaveSC:
573d422a98SShih-Po Hung     return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
58e2a72fa5SFlorian Hahn   case VPWidenStoreEVLSC:
59a9bafe91SFlorian Hahn   case VPWidenStoreSC:
60a9bafe91SFlorian Hahn     return true;
6103975b7fSFlorian Hahn   case VPReplicateSC:
6203975b7fSFlorian Hahn     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
6303975b7fSFlorian Hahn         ->mayWriteToMemory();
64e846778eSFlorian Hahn   case VPWidenCallSC:
65e846778eSFlorian Hahn     return !cast<VPWidenCallRecipe>(this)
66e846778eSFlorian Hahn                 ->getCalledScalarFunction()
67e846778eSFlorian Hahn                 ->onlyReadsMemory();
68a4819bd4SFlorian Hahn   case VPWidenIntrinsicSC:
696fbbe152SFlorian Hahn     return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();
7003975b7fSFlorian Hahn   case VPBranchOnMaskSC:
713c5f0734SFlorian Hahn   case VPScalarIVStepsSC:
725368536cSFlorian Hahn   case VPPredInstPHISC:
7303975b7fSFlorian Hahn     return false;
7403975b7fSFlorian Hahn   case VPBlendSC:
754eb30cfbSMel Chen   case VPReductionEVLSC:
7603975b7fSFlorian Hahn   case VPReductionSC:
773860e29eSFlorian Hahn   case VPVectorPointerSC:
781b05e749SFlorian Hahn   case VPWidenCanonicalIVSC:
79e3afe0b8SFlorian Hahn   case VPWidenCastSC:
801b05e749SFlorian Hahn   case VPWidenGEPSC:
811b05e749SFlorian Hahn   case VPWidenIntOrFpInductionSC:
82e2a72fa5SFlorian Hahn   case VPWidenLoadEVLSC:
83a9bafe91SFlorian Hahn   case VPWidenLoadSC:
841b05e749SFlorian Hahn   case VPWidenPHISC:
851b05e749SFlorian Hahn   case VPWidenSC:
8600e40c9bSKolya Panchenko   case VPWidenEVLSC:
8703975b7fSFlorian Hahn   case VPWidenSelectSC: {
8803975b7fSFlorian Hahn     const Instruction *I =
8903975b7fSFlorian Hahn         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
9003975b7fSFlorian Hahn     (void)I;
9103975b7fSFlorian Hahn     assert((!I || !I->mayWriteToMemory()) &&
9203975b7fSFlorian Hahn            "underlying instruction may write to memory");
9303975b7fSFlorian Hahn     return false;
9403975b7fSFlorian Hahn   }
9503975b7fSFlorian Hahn   default:
9603975b7fSFlorian Hahn     return true;
9703975b7fSFlorian Hahn   }
9803975b7fSFlorian Hahn }
9903975b7fSFlorian Hahn 
10003975b7fSFlorian Hahn bool VPRecipeBase::mayReadFromMemory() const {
10103975b7fSFlorian Hahn   switch (getVPDefID()) {
102f0d5104cSLuke Lau   case VPInstructionSC:
103f0d5104cSLuke Lau     return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
104e2a72fa5SFlorian Hahn   case VPWidenLoadEVLSC:
105a9bafe91SFlorian Hahn   case VPWidenLoadSC:
106a9bafe91SFlorian Hahn     return true;
10703975b7fSFlorian Hahn   case VPReplicateSC:
10803975b7fSFlorian Hahn     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
10903975b7fSFlorian Hahn         ->mayReadFromMemory();
110e846778eSFlorian Hahn   case VPWidenCallSC:
111e846778eSFlorian Hahn     return !cast<VPWidenCallRecipe>(this)
112e846778eSFlorian Hahn                 ->getCalledScalarFunction()
113e846778eSFlorian Hahn                 ->onlyWritesMemory();
1146fbbe152SFlorian Hahn   case VPWidenIntrinsicSC:
1156fbbe152SFlorian Hahn     return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();
11603975b7fSFlorian Hahn   case VPBranchOnMaskSC:
117cf2d436bSFlorian Hahn   case VPPredInstPHISC:
118a9bafe91SFlorian Hahn   case VPScalarIVStepsSC:
119e2a72fa5SFlorian Hahn   case VPWidenStoreEVLSC:
120a9bafe91SFlorian Hahn   case VPWidenStoreSC:
12103975b7fSFlorian Hahn     return false;
12203975b7fSFlorian Hahn   case VPBlendSC:
1234eb30cfbSMel Chen   case VPReductionEVLSC:
12403975b7fSFlorian Hahn   case VPReductionSC:
1253860e29eSFlorian Hahn   case VPVectorPointerSC:
1261b05e749SFlorian Hahn   case VPWidenCanonicalIVSC:
127e3afe0b8SFlorian Hahn   case VPWidenCastSC:
1281b05e749SFlorian Hahn   case VPWidenGEPSC:
1291b05e749SFlorian Hahn   case VPWidenIntOrFpInductionSC:
1301b05e749SFlorian Hahn   case VPWidenPHISC:
1311b05e749SFlorian Hahn   case VPWidenSC:
13200e40c9bSKolya Panchenko   case VPWidenEVLSC:
13303975b7fSFlorian Hahn   case VPWidenSelectSC: {
13403975b7fSFlorian Hahn     const Instruction *I =
13503975b7fSFlorian Hahn         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
13603975b7fSFlorian Hahn     (void)I;
13703975b7fSFlorian Hahn     assert((!I || !I->mayReadFromMemory()) &&
13803975b7fSFlorian Hahn            "underlying instruction may read from memory");
13903975b7fSFlorian Hahn     return false;
14003975b7fSFlorian Hahn   }
14103975b7fSFlorian Hahn   default:
14203975b7fSFlorian Hahn     return true;
14303975b7fSFlorian Hahn   }
14403975b7fSFlorian Hahn }
14503975b7fSFlorian Hahn 
14603975b7fSFlorian Hahn bool VPRecipeBase::mayHaveSideEffects() const {
14703975b7fSFlorian Hahn   switch (getVPDefID()) {
1480c5df7cdSFlorian Hahn   case VPDerivedIVSC:
14916e0620dSFlorian Hahn   case VPPredInstPHISC:
1500ab539fdSFlorian Hahn   case VPScalarCastSC:
151266ff98cSShih-Po Hung   case VPReverseVectorPointerSC:
15216e0620dSFlorian Hahn     return false;
15334d25924SFlorian Hahn   case VPInstructionSC:
15468ed1728SFlorian Hahn     return mayWriteToMemory();
155e846778eSFlorian Hahn   case VPWidenCallSC: {
156e846778eSFlorian Hahn     Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
157e846778eSFlorian Hahn     return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
158e846778eSFlorian Hahn   }
1596fbbe152SFlorian Hahn   case VPWidenIntrinsicSC:
1606fbbe152SFlorian Hahn     return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();
16103975b7fSFlorian Hahn   case VPBlendSC:
1624eb30cfbSMel Chen   case VPReductionEVLSC:
16303975b7fSFlorian Hahn   case VPReductionSC:
1641b05e749SFlorian Hahn   case VPScalarIVStepsSC:
1653860e29eSFlorian Hahn   case VPVectorPointerSC:
1661b05e749SFlorian Hahn   case VPWidenCanonicalIVSC:
167e3afe0b8SFlorian Hahn   case VPWidenCastSC:
1681b05e749SFlorian Hahn   case VPWidenGEPSC:
1691b05e749SFlorian Hahn   case VPWidenIntOrFpInductionSC:
1701b05e749SFlorian Hahn   case VPWidenPHISC:
1711b05e749SFlorian Hahn   case VPWidenPointerInductionSC:
1721b05e749SFlorian Hahn   case VPWidenSC:
17300e40c9bSKolya Panchenko   case VPWidenEVLSC:
1741b05e749SFlorian Hahn   case VPWidenSelectSC: {
17503975b7fSFlorian Hahn     const Instruction *I =
17603975b7fSFlorian Hahn         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
17703975b7fSFlorian Hahn     (void)I;
17803975b7fSFlorian Hahn     assert((!I || !I->mayHaveSideEffects()) &&
17903975b7fSFlorian Hahn            "underlying instruction has side-effects");
18003975b7fSFlorian Hahn     return false;
18103975b7fSFlorian Hahn   }
1823d422a98SShih-Po Hung   case VPInterleaveSC:
1833d422a98SShih-Po Hung     return mayWriteToMemory();
184e2a72fa5SFlorian Hahn   case VPWidenLoadEVLSC:
185a9bafe91SFlorian Hahn   case VPWidenLoadSC:
186e2a72fa5SFlorian Hahn   case VPWidenStoreEVLSC:
187a9bafe91SFlorian Hahn   case VPWidenStoreSC:
188a9bafe91SFlorian Hahn     assert(
189a9bafe91SFlorian Hahn         cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
190a9bafe91SFlorian Hahn             mayWriteToMemory() &&
19132efff59SFlorian Hahn         "mayHaveSideffects result for ingredient differs from this "
19232efff59SFlorian Hahn         "implementation");
19332efff59SFlorian Hahn     return mayWriteToMemory();
19403975b7fSFlorian Hahn   case VPReplicateSC: {
19503975b7fSFlorian Hahn     auto *R = cast<VPReplicateRecipe>(this);
19603975b7fSFlorian Hahn     return R->getUnderlyingInstr()->mayHaveSideEffects();
19703975b7fSFlorian Hahn   }
19803975b7fSFlorian Hahn   default:
19903975b7fSFlorian Hahn     return true;
20003975b7fSFlorian Hahn   }
20103975b7fSFlorian Hahn }
20203975b7fSFlorian Hahn 
20303975b7fSFlorian Hahn void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
20403975b7fSFlorian Hahn   assert(!Parent && "Recipe already in some VPBasicBlock");
20503975b7fSFlorian Hahn   assert(InsertPos->getParent() &&
20603975b7fSFlorian Hahn          "Insertion position not in any VPBasicBlock");
2079277a323SFlorian Hahn   InsertPos->getParent()->insert(this, InsertPos->getIterator());
20803975b7fSFlorian Hahn }
20903975b7fSFlorian Hahn 
21003975b7fSFlorian Hahn void VPRecipeBase::insertBefore(VPBasicBlock &BB,
21103975b7fSFlorian Hahn                                 iplist<VPRecipeBase>::iterator I) {
21203975b7fSFlorian Hahn   assert(!Parent && "Recipe already in some VPBasicBlock");
21303975b7fSFlorian Hahn   assert(I == BB.end() || I->getParent() == &BB);
2149277a323SFlorian Hahn   BB.insert(this, I);
21503975b7fSFlorian Hahn }
21603975b7fSFlorian Hahn 
21703975b7fSFlorian Hahn void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
21803975b7fSFlorian Hahn   assert(!Parent && "Recipe already in some VPBasicBlock");
21903975b7fSFlorian Hahn   assert(InsertPos->getParent() &&
22003975b7fSFlorian Hahn          "Insertion position not in any VPBasicBlock");
2219277a323SFlorian Hahn   InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
22203975b7fSFlorian Hahn }
22303975b7fSFlorian Hahn 
22403975b7fSFlorian Hahn void VPRecipeBase::removeFromParent() {
22503975b7fSFlorian Hahn   assert(getParent() && "Recipe not in any VPBasicBlock");
22603975b7fSFlorian Hahn   getParent()->getRecipeList().remove(getIterator());
22703975b7fSFlorian Hahn   Parent = nullptr;
22803975b7fSFlorian Hahn }
22903975b7fSFlorian Hahn 
23003975b7fSFlorian Hahn iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
23103975b7fSFlorian Hahn   assert(getParent() && "Recipe not in any VPBasicBlock");
23203975b7fSFlorian Hahn   return getParent()->getRecipeList().erase(getIterator());
23303975b7fSFlorian Hahn }
23403975b7fSFlorian Hahn 
23503975b7fSFlorian Hahn void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
23603975b7fSFlorian Hahn   removeFromParent();
23703975b7fSFlorian Hahn   insertAfter(InsertPos);
23803975b7fSFlorian Hahn }
23903975b7fSFlorian Hahn 
24003975b7fSFlorian Hahn void VPRecipeBase::moveBefore(VPBasicBlock &BB,
24103975b7fSFlorian Hahn                               iplist<VPRecipeBase>::iterator I) {
24203975b7fSFlorian Hahn   removeFromParent();
24303975b7fSFlorian Hahn   insertBefore(BB, I);
24403975b7fSFlorian Hahn }
24503975b7fSFlorian Hahn 
246b841e2ecSFlorian Hahn InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
247fa3258ecSFlorian Hahn   // Get the underlying instruction for the recipe, if there is one. It is used
248fa3258ecSFlorian Hahn   // to
249fa3258ecSFlorian Hahn   //   * decide if cost computation should be skipped for this recipe,
250fa3258ecSFlorian Hahn   //   * apply forced target instruction cost.
251fa3258ecSFlorian Hahn   Instruction *UI = nullptr;
252fa3258ecSFlorian Hahn   if (auto *S = dyn_cast<VPSingleDefRecipe>(this))
253fa3258ecSFlorian Hahn     UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
254fa3258ecSFlorian Hahn   else if (auto *IG = dyn_cast<VPInterleaveRecipe>(this))
255fa3258ecSFlorian Hahn     UI = IG->getInsertPos();
256fa3258ecSFlorian Hahn   else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))
257fa3258ecSFlorian Hahn     UI = &WidenMem->getIngredient();
258b841e2ecSFlorian Hahn 
259fa3258ecSFlorian Hahn   InstructionCost RecipeCost;
260fa3258ecSFlorian Hahn   if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {
261fa3258ecSFlorian Hahn     RecipeCost = 0;
262fa3258ecSFlorian Hahn   } else {
263fa3258ecSFlorian Hahn     RecipeCost = computeCost(VF, Ctx);
264bb60dd39SFlorian Hahn     if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
265b841e2ecSFlorian Hahn         RecipeCost.isValid())
266b841e2ecSFlorian Hahn       RecipeCost = InstructionCost(ForceTargetInstructionCost);
267fa3258ecSFlorian Hahn   }
268b841e2ecSFlorian Hahn 
269b841e2ecSFlorian Hahn   LLVM_DEBUG({
270b841e2ecSFlorian Hahn     dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
271b841e2ecSFlorian Hahn     dump();
272b841e2ecSFlorian Hahn   });
273b841e2ecSFlorian Hahn   return RecipeCost;
274b841e2ecSFlorian Hahn }
275b841e2ecSFlorian Hahn 
276b841e2ecSFlorian Hahn InstructionCost VPRecipeBase::computeCost(ElementCount VF,
277b841e2ecSFlorian Hahn                                           VPCostContext &Ctx) const {
278fa3258ecSFlorian Hahn   llvm_unreachable("subclasses should implement computeCost");
279fa3258ecSFlorian Hahn }
280fa3258ecSFlorian Hahn 
281795e35a6SSam Tebbs InstructionCost
282795e35a6SSam Tebbs VPPartialReductionRecipe::computeCost(ElementCount VF,
283795e35a6SSam Tebbs                                       VPCostContext &Ctx) const {
284795e35a6SSam Tebbs   std::optional<unsigned> Opcode = std::nullopt;
285795e35a6SSam Tebbs   VPRecipeBase *BinOpR = getOperand(0)->getDefiningRecipe();
286795e35a6SSam Tebbs   if (auto *WidenR = dyn_cast<VPWidenRecipe>(BinOpR))
287795e35a6SSam Tebbs     Opcode = std::make_optional(WidenR->getOpcode());
288795e35a6SSam Tebbs 
289795e35a6SSam Tebbs   VPRecipeBase *ExtAR = BinOpR->getOperand(0)->getDefiningRecipe();
290795e35a6SSam Tebbs   VPRecipeBase *ExtBR = BinOpR->getOperand(1)->getDefiningRecipe();
291795e35a6SSam Tebbs 
292795e35a6SSam Tebbs   auto *PhiType = Ctx.Types.inferScalarType(getOperand(1));
293795e35a6SSam Tebbs   auto *InputTypeA = Ctx.Types.inferScalarType(ExtAR ? ExtAR->getOperand(0)
294795e35a6SSam Tebbs                                                      : BinOpR->getOperand(0));
295795e35a6SSam Tebbs   auto *InputTypeB = Ctx.Types.inferScalarType(ExtBR ? ExtBR->getOperand(0)
296795e35a6SSam Tebbs                                                      : BinOpR->getOperand(1));
297795e35a6SSam Tebbs 
298795e35a6SSam Tebbs   auto GetExtendKind = [](VPRecipeBase *R) {
299795e35a6SSam Tebbs     // The extend could come from outside the plan.
300795e35a6SSam Tebbs     if (!R)
301795e35a6SSam Tebbs       return TargetTransformInfo::PR_None;
302795e35a6SSam Tebbs     auto *WidenCastR = dyn_cast<VPWidenCastRecipe>(R);
303795e35a6SSam Tebbs     if (!WidenCastR)
304795e35a6SSam Tebbs       return TargetTransformInfo::PR_None;
305795e35a6SSam Tebbs     if (WidenCastR->getOpcode() == Instruction::CastOps::ZExt)
306795e35a6SSam Tebbs       return TargetTransformInfo::PR_ZeroExtend;
307795e35a6SSam Tebbs     if (WidenCastR->getOpcode() == Instruction::CastOps::SExt)
308795e35a6SSam Tebbs       return TargetTransformInfo::PR_SignExtend;
309795e35a6SSam Tebbs     return TargetTransformInfo::PR_None;
310795e35a6SSam Tebbs   };
311795e35a6SSam Tebbs 
312795e35a6SSam Tebbs   return Ctx.TTI.getPartialReductionCost(getOpcode(), InputTypeA, InputTypeB,
313795e35a6SSam Tebbs                                          PhiType, VF, GetExtendKind(ExtAR),
314795e35a6SSam Tebbs                                          GetExtendKind(ExtBR), Opcode);
315795e35a6SSam Tebbs }
316795e35a6SSam Tebbs 
317795e35a6SSam Tebbs void VPPartialReductionRecipe::execute(VPTransformState &State) {
318795e35a6SSam Tebbs   State.setDebugLocFrom(getDebugLoc());
319795e35a6SSam Tebbs   auto &Builder = State.Builder;
320795e35a6SSam Tebbs 
321795e35a6SSam Tebbs   assert(getOpcode() == Instruction::Add &&
322795e35a6SSam Tebbs          "Unhandled partial reduction opcode");
323795e35a6SSam Tebbs 
324795e35a6SSam Tebbs   Value *BinOpVal = State.get(getOperand(0));
325795e35a6SSam Tebbs   Value *PhiVal = State.get(getOperand(1));
326795e35a6SSam Tebbs   assert(PhiVal && BinOpVal && "Phi and Mul must be set");
327795e35a6SSam Tebbs 
328795e35a6SSam Tebbs   Type *RetTy = PhiVal->getType();
329795e35a6SSam Tebbs 
330795e35a6SSam Tebbs   CallInst *V = Builder.CreateIntrinsic(
331795e35a6SSam Tebbs       RetTy, Intrinsic::experimental_vector_partial_reduce_add,
332795e35a6SSam Tebbs       {PhiVal, BinOpVal}, nullptr, "partial.reduce");
333795e35a6SSam Tebbs 
334795e35a6SSam Tebbs   State.set(this, V);
335795e35a6SSam Tebbs }
336795e35a6SSam Tebbs 
337795e35a6SSam Tebbs #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
338795e35a6SSam Tebbs void VPPartialReductionRecipe::print(raw_ostream &O, const Twine &Indent,
339795e35a6SSam Tebbs                                      VPSlotTracker &SlotTracker) const {
340795e35a6SSam Tebbs   O << Indent << "PARTIAL-REDUCE ";
341795e35a6SSam Tebbs   printAsOperand(O, SlotTracker);
342795e35a6SSam Tebbs   O << " = " << Instruction::getOpcodeName(getOpcode()) << " ";
343795e35a6SSam Tebbs   printOperands(O, SlotTracker);
344795e35a6SSam Tebbs }
345795e35a6SSam Tebbs #endif
346795e35a6SSam Tebbs 
3470b17e9d2SFlorian Hahn FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const {
3480b17e9d2SFlorian Hahn   assert(OpType == OperationType::FPMathOp &&
3490b17e9d2SFlorian Hahn          "recipe doesn't have fast math flags");
3500b17e9d2SFlorian Hahn   FastMathFlags Res;
3510b17e9d2SFlorian Hahn   Res.setAllowReassoc(FMFs.AllowReassoc);
3520b17e9d2SFlorian Hahn   Res.setNoNaNs(FMFs.NoNaNs);
3530b17e9d2SFlorian Hahn   Res.setNoInfs(FMFs.NoInfs);
3540b17e9d2SFlorian Hahn   Res.setNoSignedZeros(FMFs.NoSignedZeros);
3550b17e9d2SFlorian Hahn   Res.setAllowReciprocal(FMFs.AllowReciprocal);
3560b17e9d2SFlorian Hahn   Res.setAllowContract(FMFs.AllowContract);
3570b17e9d2SFlorian Hahn   Res.setApproxFunc(FMFs.ApproxFunc);
3580b17e9d2SFlorian Hahn   return Res;
3590b17e9d2SFlorian Hahn }
3600b17e9d2SFlorian Hahn 
36181bbe193SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
36281bbe193SFlorian Hahn void VPSingleDefRecipe::dump() const { VPDef::dump(); }
36381bbe193SFlorian Hahn #endif
36481bbe193SFlorian Hahn 
3658ec40675SFlorian Hahn template <unsigned PartOpIdx>
3668ec40675SFlorian Hahn VPValue *
3678ec40675SFlorian Hahn VPUnrollPartAccessor<PartOpIdx>::getUnrollPartOperand(VPUser &U) const {
3688ec40675SFlorian Hahn   if (U.getNumOperands() == PartOpIdx + 1)
3698ec40675SFlorian Hahn     return U.getOperand(PartOpIdx);
3708ec40675SFlorian Hahn   return nullptr;
3718ec40675SFlorian Hahn }
3728ec40675SFlorian Hahn 
3738ec40675SFlorian Hahn template <unsigned PartOpIdx>
3748ec40675SFlorian Hahn unsigned VPUnrollPartAccessor<PartOpIdx>::getUnrollPart(VPUser &U) const {
3758ec40675SFlorian Hahn   if (auto *UnrollPartOp = getUnrollPartOperand(U))
3768ec40675SFlorian Hahn     return cast<ConstantInt>(UnrollPartOp->getLiveInIRValue())->getZExtValue();
3778ec40675SFlorian Hahn   return 0;
3788ec40675SFlorian Hahn }
3798ec40675SFlorian Hahn 
380fd661957SFlorian Hahn VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred,
381fd661957SFlorian Hahn                              VPValue *A, VPValue *B, DebugLoc DL,
382fd661957SFlorian Hahn                              const Twine &Name)
383fd661957SFlorian Hahn     : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
384165e24aaSFlorian Hahn                           Pred, DL),
385abdb61f5SFlorian Hahn       Opcode(Opcode), Name(Name.str()) {
386fd661957SFlorian Hahn   assert(Opcode == Instruction::ICmp &&
387fd661957SFlorian Hahn          "only ICmp predicates supported at the moment");
388fd661957SFlorian Hahn }
389fd661957SFlorian Hahn 
390698ae660SFlorian Hahn VPInstruction::VPInstruction(unsigned Opcode,
391698ae660SFlorian Hahn                              std::initializer_list<VPValue *> Operands,
392698ae660SFlorian Hahn                              FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
393165e24aaSFlorian Hahn     : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
394abdb61f5SFlorian Hahn       Opcode(Opcode), Name(Name.str()) {
395698ae660SFlorian Hahn   // Make sure the VPInstruction is a floating-point operation.
396698ae660SFlorian Hahn   assert(isFPMathOp() && "this op can't take fast-math flags");
397698ae660SFlorian Hahn }
398698ae660SFlorian Hahn 
39906bb8c9fSFlorian Hahn bool VPInstruction::doesGeneratePerAllLanes() const {
40006bb8c9fSFlorian Hahn   return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
40106bb8c9fSFlorian Hahn }
40206bb8c9fSFlorian Hahn 
40306bb8c9fSFlorian Hahn bool VPInstruction::canGenerateScalarForFirstLane() const {
40406bb8c9fSFlorian Hahn   if (Instruction::isBinaryOp(getOpcode()))
40506bb8c9fSFlorian Hahn     return true;
4069a5a8731SFlorian Hahn   if (isSingleScalar() || isVectorToScalar())
40707b33013SFlorian Hahn     return true;
40806bb8c9fSFlorian Hahn   switch (Opcode) {
40999d6c6d9SFlorian Hahn   case Instruction::ICmp:
410f148d579SAlexey Bataev   case Instruction::Select:
41106bb8c9fSFlorian Hahn   case VPInstruction::BranchOnCond:
41206bb8c9fSFlorian Hahn   case VPInstruction::BranchOnCount:
41306bb8c9fSFlorian Hahn   case VPInstruction::CalculateTripCountMinusVF:
41406bb8c9fSFlorian Hahn   case VPInstruction::CanonicalIVIncrementForPart:
41506bb8c9fSFlorian Hahn   case VPInstruction::PtrAdd:
416413a66f3SAlexey Bataev   case VPInstruction::ExplicitVectorLength:
4175fae408dSFlorian Hahn   case VPInstruction::AnyOf:
41806bb8c9fSFlorian Hahn     return true;
41906bb8c9fSFlorian Hahn   default:
42006bb8c9fSFlorian Hahn     return false;
42106bb8c9fSFlorian Hahn   }
42206bb8c9fSFlorian Hahn }
42306bb8c9fSFlorian Hahn 
42406bb8c9fSFlorian Hahn Value *VPInstruction::generatePerLane(VPTransformState &State,
425aae7ac66SFlorian Hahn                                       const VPLane &Lane) {
42603975b7fSFlorian Hahn   IRBuilderBase &Builder = State.Builder;
42706bb8c9fSFlorian Hahn 
42806bb8c9fSFlorian Hahn   assert(getOpcode() == VPInstruction::PtrAdd &&
42906bb8c9fSFlorian Hahn          "only PtrAdd opcodes are supported for now");
43006bb8c9fSFlorian Hahn   return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
43106bb8c9fSFlorian Hahn                               State.get(getOperand(1), Lane), Name);
43206bb8c9fSFlorian Hahn }
43306bb8c9fSFlorian Hahn 
43406c3a7d2SFlorian Hahn Value *VPInstruction::generate(VPTransformState &State) {
43506bb8c9fSFlorian Hahn   IRBuilderBase &Builder = State.Builder;
43603975b7fSFlorian Hahn 
43703975b7fSFlorian Hahn   if (Instruction::isBinaryOp(getOpcode())) {
438911055e3SFlorian Hahn     bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
43957f5d8f2SFlorian Hahn     Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
44057f5d8f2SFlorian Hahn     Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
441a5891fa4SFlorian Hahn     auto *Res =
442a5891fa4SFlorian Hahn         Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
443a5891fa4SFlorian Hahn     if (auto *I = dyn_cast<Instruction>(Res))
444a5891fa4SFlorian Hahn       setFlags(I);
445a5891fa4SFlorian Hahn     return Res;
44603975b7fSFlorian Hahn   }
44703975b7fSFlorian Hahn 
44803975b7fSFlorian Hahn   switch (getOpcode()) {
44903975b7fSFlorian Hahn   case VPInstruction::Not: {
45057f5d8f2SFlorian Hahn     Value *A = State.get(getOperand(0));
4512265bb06SFlorian Hahn     return Builder.CreateNot(A, Name);
45203975b7fSFlorian Hahn   }
453fd661957SFlorian Hahn   case Instruction::ICmp: {
45499d6c6d9SFlorian Hahn     bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
45557f5d8f2SFlorian Hahn     Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
45657f5d8f2SFlorian Hahn     Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
457fd661957SFlorian Hahn     return Builder.CreateCmp(getPredicate(), A, B, Name);
45803975b7fSFlorian Hahn   }
45903975b7fSFlorian Hahn   case Instruction::Select: {
460f148d579SAlexey Bataev     bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
461f148d579SAlexey Bataev     Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed);
462f148d579SAlexey Bataev     Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);
463f148d579SAlexey Bataev     Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);
4642265bb06SFlorian Hahn     return Builder.CreateSelect(Cond, Op1, Op2, Name);
46503975b7fSFlorian Hahn   }
46603975b7fSFlorian Hahn   case VPInstruction::ActiveLaneMask: {
46703975b7fSFlorian Hahn     // Get first lane of vector induction variable.
468aae7ac66SFlorian Hahn     Value *VIVElem0 = State.get(getOperand(0), VPLane(0));
46903975b7fSFlorian Hahn     // Get the original loop tripcount.
470aae7ac66SFlorian Hahn     Value *ScalarTC = State.get(getOperand(1), VPLane(0));
47103975b7fSFlorian Hahn 
472012d2171SCameron McInally     // If this part of the active lane mask is scalar, generate the CMP directly
473012d2171SCameron McInally     // to avoid unnecessary extracts.
474012d2171SCameron McInally     if (State.VF.isScalar())
475012d2171SCameron McInally       return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
476012d2171SCameron McInally                                Name);
477012d2171SCameron McInally 
47803975b7fSFlorian Hahn     auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
47903975b7fSFlorian Hahn     auto *PredTy = VectorType::get(Int1Ty, State.VF);
4802265bb06SFlorian Hahn     return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
4812265bb06SFlorian Hahn                                    {PredTy, ScalarTC->getType()},
48202d6950dSDavid Sherwood                                    {VIVElem0, ScalarTC}, nullptr, Name);
48303975b7fSFlorian Hahn   }
48403975b7fSFlorian Hahn   case VPInstruction::FirstOrderRecurrenceSplice: {
48503975b7fSFlorian Hahn     // Generate code to combine the previous and current values in vector v3.
48603975b7fSFlorian Hahn     //
48703975b7fSFlorian Hahn     //   vector.ph:
48803975b7fSFlorian Hahn     //     v_init = vector(..., ..., ..., a[-1])
48903975b7fSFlorian Hahn     //     br vector.body
49003975b7fSFlorian Hahn     //
49103975b7fSFlorian Hahn     //   vector.body
49203975b7fSFlorian Hahn     //     i = phi [0, vector.ph], [i+4, vector.body]
49303975b7fSFlorian Hahn     //     v1 = phi [v_init, vector.ph], [v2, vector.body]
49403975b7fSFlorian Hahn     //     v2 = a[i, i+1, i+2, i+3];
49503975b7fSFlorian Hahn     //     v3 = vector(v1(3), v2(0, 1, 2))
49603975b7fSFlorian Hahn 
49757f5d8f2SFlorian Hahn     auto *V1 = State.get(getOperand(0));
49806c3a7d2SFlorian Hahn     if (!V1->getType()->isVectorTy())
49906c3a7d2SFlorian Hahn       return V1;
50057f5d8f2SFlorian Hahn     Value *V2 = State.get(getOperand(1));
50106c3a7d2SFlorian Hahn     return Builder.CreateVectorSplice(V1, V2, -1, Name);
50203975b7fSFlorian Hahn   }
503fe1b51ffSSander de Smalen   case VPInstruction::CalculateTripCountMinusVF: {
5048ec40675SFlorian Hahn     unsigned UF = getParent()->getPlan()->getUF();
505aae7ac66SFlorian Hahn     Value *ScalarTC = State.get(getOperand(0), VPLane(0));
5068ec40675SFlorian Hahn     Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF);
507fe1b51ffSSander de Smalen     Value *Sub = Builder.CreateSub(ScalarTC, Step);
508fe1b51ffSSander de Smalen     Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
509fe1b51ffSSander de Smalen     Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
5102265bb06SFlorian Hahn     return Builder.CreateSelect(Cmp, Sub, Zero);
511fe1b51ffSSander de Smalen   }
512413a66f3SAlexey Bataev   case VPInstruction::ExplicitVectorLength: {
51360ed2361SAlexey Bataev     // TODO: Restructure this code with an explicit remainder loop, vsetvli can
51460ed2361SAlexey Bataev     // be outside of the main loop.
515aae7ac66SFlorian Hahn     Value *AVL = State.get(getOperand(0), /*IsScalar*/ true);
516413a66f3SAlexey Bataev     // Compute EVL
517413a66f3SAlexey Bataev     assert(AVL->getType()->isIntegerTy() &&
518413a66f3SAlexey Bataev            "Requested vector length should be an integer.");
519413a66f3SAlexey Bataev 
520413a66f3SAlexey Bataev     assert(State.VF.isScalable() && "Expected scalable vector factor.");
521413a66f3SAlexey Bataev     Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
522413a66f3SAlexey Bataev 
523413a66f3SAlexey Bataev     Value *EVL = State.Builder.CreateIntrinsic(
524413a66f3SAlexey Bataev         State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
525413a66f3SAlexey Bataev         {AVL, VFArg, State.Builder.getTrue()});
526413a66f3SAlexey Bataev     return EVL;
527413a66f3SAlexey Bataev   }
528af635a55SFlorian Hahn   case VPInstruction::CanonicalIVIncrementForPart: {
5298ec40675SFlorian Hahn     unsigned Part = getUnrollPart(*this);
530aae7ac66SFlorian Hahn     auto *IV = State.get(getOperand(0), VPLane(0));
5318ec40675SFlorian Hahn     assert(Part != 0 && "Must have a positive part");
5328ec40675SFlorian Hahn     // The canonical IV is incremented by the vectorization factor (num of
5338ec40675SFlorian Hahn     // SIMD elements) times the unroll part.
53403fee671SDavid Sherwood     Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
535b6d994deSFlorian Hahn     return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
536b6d994deSFlorian Hahn                              hasNoSignedWrap());
53703fee671SDavid Sherwood   }
53803975b7fSFlorian Hahn   case VPInstruction::BranchOnCond: {
539aae7ac66SFlorian Hahn     Value *Cond = State.get(getOperand(0), VPLane(0));
54003975b7fSFlorian Hahn     // Replace the temporary unreachable terminator with a new conditional
54103975b7fSFlorian Hahn     // branch, hooking it up to backward destination for exiting blocks now and
54203975b7fSFlorian Hahn     // to forward destination(s) later when they are created.
54303975b7fSFlorian Hahn     BranchInst *CondBr =
54403975b7fSFlorian Hahn         Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
54503975b7fSFlorian Hahn     CondBr->setSuccessor(0, nullptr);
54603975b7fSFlorian Hahn     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
547ab9c2b1cSFlorian Hahn 
548ab9c2b1cSFlorian Hahn     if (!getParent()->isExiting())
549ab9c2b1cSFlorian Hahn       return CondBr;
550ab9c2b1cSFlorian Hahn 
551ab9c2b1cSFlorian Hahn     VPRegionBlock *ParentRegion = getParent()->getParent();
552ab9c2b1cSFlorian Hahn     VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
553ab9c2b1cSFlorian Hahn     CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
5542265bb06SFlorian Hahn     return CondBr;
55503975b7fSFlorian Hahn   }
55603975b7fSFlorian Hahn   case VPInstruction::BranchOnCount: {
55703975b7fSFlorian Hahn     // First create the compare.
55857f5d8f2SFlorian Hahn     Value *IV = State.get(getOperand(0), /*IsScalar*/ true);
55957f5d8f2SFlorian Hahn     Value *TC = State.get(getOperand(1), /*IsScalar*/ true);
56003975b7fSFlorian Hahn     Value *Cond = Builder.CreateICmpEQ(IV, TC);
56103975b7fSFlorian Hahn 
56203975b7fSFlorian Hahn     // Now create the branch.
56303975b7fSFlorian Hahn     auto *Plan = getParent()->getPlan();
56403975b7fSFlorian Hahn     VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
56503975b7fSFlorian Hahn     VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
56603975b7fSFlorian Hahn 
56703975b7fSFlorian Hahn     // Replace the temporary unreachable terminator with a new conditional
56803975b7fSFlorian Hahn     // branch, hooking it up to backward destination (the header) now and to the
56903975b7fSFlorian Hahn     // forward destination (the exit/middle block) later when it is created.
57003975b7fSFlorian Hahn     // Note that CreateCondBr expects a valid BB as first argument, so we need
57103975b7fSFlorian Hahn     // to set it to nullptr later.
57203975b7fSFlorian Hahn     BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
57303975b7fSFlorian Hahn                                               State.CFG.VPBB2IRBB[Header]);
57403975b7fSFlorian Hahn     CondBr->setSuccessor(0, nullptr);
57503975b7fSFlorian Hahn     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
5762265bb06SFlorian Hahn     return CondBr;
57703975b7fSFlorian Hahn   }
578241fe837SFlorian Hahn   case VPInstruction::ComputeReductionResult: {
579241fe837SFlorian Hahn     // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
580241fe837SFlorian Hahn     // and will be removed by breaking up the recipe further.
581241fe837SFlorian Hahn     auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
582241fe837SFlorian Hahn     auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
583241fe837SFlorian Hahn     // Get its reduction variable descriptor.
584241fe837SFlorian Hahn     const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
585241fe837SFlorian Hahn 
586241fe837SFlorian Hahn     RecurKind RK = RdxDesc.getRecurrenceKind();
587241fe837SFlorian Hahn 
588241fe837SFlorian Hahn     Type *PhiTy = OrigPhi->getType();
5898ec40675SFlorian Hahn     // The recipe's operands are the reduction phi, followed by one operand for
5908ec40675SFlorian Hahn     // each part of the reduction.
5918ec40675SFlorian Hahn     unsigned UF = getNumOperands() - 1;
5928ec40675SFlorian Hahn     VectorParts RdxParts(UF);
5938ec40675SFlorian Hahn     for (unsigned Part = 0; Part < UF; ++Part)
59457f5d8f2SFlorian Hahn       RdxParts[Part] = State.get(getOperand(1 + Part), PhiR->isInLoop());
595241fe837SFlorian Hahn 
596241fe837SFlorian Hahn     // If the vector reduction can be performed in a smaller type, we truncate
597241fe837SFlorian Hahn     // then extend the loop exit value to enable InstCombine to evaluate the
598241fe837SFlorian Hahn     // entire expression in the smaller type.
599241fe837SFlorian Hahn     // TODO: Handle this in truncateToMinBW.
600241fe837SFlorian Hahn     if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
601241fe837SFlorian Hahn       Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
6028ec40675SFlorian Hahn       for (unsigned Part = 0; Part < UF; ++Part)
603241fe837SFlorian Hahn         RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
604241fe837SFlorian Hahn     }
605241fe837SFlorian Hahn     // Reduce all of the unrolled parts into a single vector.
606241fe837SFlorian Hahn     Value *ReducedPartRdx = RdxParts[0];
6072a0ee090SRamkumar Ramachandra     unsigned Op = RdxDesc.getOpcode();
608bccb7ed8SFlorian Hahn     if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
609bccb7ed8SFlorian Hahn       Op = Instruction::Or;
610241fe837SFlorian Hahn 
611241fe837SFlorian Hahn     if (PhiR->isOrdered()) {
6128ec40675SFlorian Hahn       ReducedPartRdx = RdxParts[UF - 1];
613241fe837SFlorian Hahn     } else {
614241fe837SFlorian Hahn       // Floating-point operations should have some FMF to enable the reduction.
615241fe837SFlorian Hahn       IRBuilderBase::FastMathFlagGuard FMFG(Builder);
616241fe837SFlorian Hahn       Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
6178ec40675SFlorian Hahn       for (unsigned Part = 1; Part < UF; ++Part) {
618241fe837SFlorian Hahn         Value *RdxPart = RdxParts[Part];
619241fe837SFlorian Hahn         if (Op != Instruction::ICmp && Op != Instruction::FCmp)
620241fe837SFlorian Hahn           ReducedPartRdx = Builder.CreateBinOp(
621241fe837SFlorian Hahn               (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
622b3cba9beSMel Chen         else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK))
623b3cba9beSMel Chen           ReducedPartRdx =
624b3cba9beSMel Chen               createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart);
625bccb7ed8SFlorian Hahn         else
626241fe837SFlorian Hahn           ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
627241fe837SFlorian Hahn       }
628241fe837SFlorian Hahn     }
629241fe837SFlorian Hahn 
630241fe837SFlorian Hahn     // Create the reduction after the loop. Note that inloop reductions create
631241fe837SFlorian Hahn     // the target reduction in the loop using a Reduction recipe.
632bccb7ed8SFlorian Hahn     if ((State.VF.isVector() ||
633b3cba9beSMel Chen          RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
634b3cba9beSMel Chen          RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) &&
635bccb7ed8SFlorian Hahn         !PhiR->isInLoop()) {
636241fe837SFlorian Hahn       ReducedPartRdx =
6373e8840baSPhilip Reames           createReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
638241fe837SFlorian Hahn       // If the reduction can be performed in a smaller type, we need to extend
639241fe837SFlorian Hahn       // the reduction to the wider type before we branch to the original loop.
640241fe837SFlorian Hahn       if (PhiTy != RdxDesc.getRecurrenceType())
641241fe837SFlorian Hahn         ReducedPartRdx = RdxDesc.isSigned()
642241fe837SFlorian Hahn                              ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
643241fe837SFlorian Hahn                              : Builder.CreateZExt(ReducedPartRdx, PhiTy);
644241fe837SFlorian Hahn     }
645241fe837SFlorian Hahn 
646241fe837SFlorian Hahn     return ReducedPartRdx;
647241fe837SFlorian Hahn   }
64807b33013SFlorian Hahn   case VPInstruction::ExtractFromEnd: {
64907b33013SFlorian Hahn     auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
65007b33013SFlorian Hahn     unsigned Offset = CI->getZExtValue();
65107b33013SFlorian Hahn     assert(Offset > 0 && "Offset from end must be positive");
65207b33013SFlorian Hahn     Value *Res;
65307b33013SFlorian Hahn     if (State.VF.isVector()) {
65407b33013SFlorian Hahn       assert(Offset <= State.VF.getKnownMinValue() &&
65507b33013SFlorian Hahn              "invalid offset to extract from");
65607b33013SFlorian Hahn       // Extract lane VF - Offset from the operand.
657aae7ac66SFlorian Hahn       Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset));
65807b33013SFlorian Hahn     } else {
65906c3a7d2SFlorian Hahn       assert(Offset <= 1 && "invalid offset to extract from");
66057f5d8f2SFlorian Hahn       Res = State.get(getOperand(0));
66107b33013SFlorian Hahn     }
66240a72f8cSFlorian Hahn     if (isa<ExtractElementInst>(Res))
66307b33013SFlorian Hahn       Res->setName(Name);
66407b33013SFlorian Hahn     return Res;
66507b33013SFlorian Hahn   }
666632317e9SFlorian Hahn   case VPInstruction::LogicalAnd: {
66757f5d8f2SFlorian Hahn     Value *A = State.get(getOperand(0));
66857f5d8f2SFlorian Hahn     Value *B = State.get(getOperand(1));
669632317e9SFlorian Hahn     return Builder.CreateLogicalAnd(A, B, Name);
670632317e9SFlorian Hahn   }
67106bb8c9fSFlorian Hahn   case VPInstruction::PtrAdd: {
67206bb8c9fSFlorian Hahn     assert(vputils::onlyFirstLaneUsed(this) &&
67306bb8c9fSFlorian Hahn            "can only generate first lane for PtrAdd");
67401cbbc52SFlorian Hahn     Value *Ptr = State.get(getOperand(0), VPLane(0));
67501cbbc52SFlorian Hahn     Value *Addend = State.get(getOperand(1), VPLane(0));
67611571874SNikita Popov     return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
67706bb8c9fSFlorian Hahn   }
6789a5a8731SFlorian Hahn   case VPInstruction::ResumePhi: {
6799a5a8731SFlorian Hahn     Value *IncomingFromVPlanPred =
68057f5d8f2SFlorian Hahn         State.get(getOperand(0), /* IsScalar */ true);
6819a5a8731SFlorian Hahn     Value *IncomingFromOtherPreds =
68257f5d8f2SFlorian Hahn         State.get(getOperand(1), /* IsScalar */ true);
6839a5a8731SFlorian Hahn     auto *NewPhi =
684bb86c5ddSFlorian Hahn         Builder.CreatePHI(State.TypeAnalysis.inferScalarType(this), 2, Name);
6859a5a8731SFlorian Hahn     BasicBlock *VPlanPred =
6869a5a8731SFlorian Hahn         State.CFG
6876c8f41d3SFlorian Hahn             .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getPredecessors()[0])];
6889a5a8731SFlorian Hahn     NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
6894ad0fdd1SFlorian Hahn     for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
6906c8f41d3SFlorian Hahn       if (OtherPred == VPlanPred)
6916c8f41d3SFlorian Hahn         continue;
6929a5a8731SFlorian Hahn       NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);
6939a5a8731SFlorian Hahn     }
6949a5a8731SFlorian Hahn     return NewPhi;
6959a5a8731SFlorian Hahn   }
6965fae408dSFlorian Hahn   case VPInstruction::AnyOf: {
6975fae408dSFlorian Hahn     Value *A = State.get(getOperand(0));
6985fae408dSFlorian Hahn     return Builder.CreateOrReduce(A);
6995fae408dSFlorian Hahn   }
7009a5a8731SFlorian Hahn 
70103975b7fSFlorian Hahn   default:
70203975b7fSFlorian Hahn     llvm_unreachable("Unsupported opcode for instruction");
70303975b7fSFlorian Hahn   }
70403975b7fSFlorian Hahn }
70503975b7fSFlorian Hahn 
70607b33013SFlorian Hahn bool VPInstruction::isVectorToScalar() const {
70707b33013SFlorian Hahn   return getOpcode() == VPInstruction::ExtractFromEnd ||
7085fae408dSFlorian Hahn          getOpcode() == VPInstruction::ComputeReductionResult ||
7095fae408dSFlorian Hahn          getOpcode() == VPInstruction::AnyOf;
71007b33013SFlorian Hahn }
71107b33013SFlorian Hahn 
7129a5a8731SFlorian Hahn bool VPInstruction::isSingleScalar() const {
7139a5a8731SFlorian Hahn   return getOpcode() == VPInstruction::ResumePhi;
7149a5a8731SFlorian Hahn }
7159a5a8731SFlorian Hahn 
716698ae660SFlorian Hahn #if !defined(NDEBUG)
717698ae660SFlorian Hahn bool VPInstruction::isFPMathOp() const {
718698ae660SFlorian Hahn   // Inspired by FPMathOperator::classof. Notable differences are that we don't
719698ae660SFlorian Hahn   // support Call, PHI and Select opcodes here yet.
720698ae660SFlorian Hahn   return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
721698ae660SFlorian Hahn          Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
722698ae660SFlorian Hahn          Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
723f23246a0SFlorian Hahn          Opcode == Instruction::FCmp || Opcode == Instruction::Select;
724698ae660SFlorian Hahn }
725698ae660SFlorian Hahn #endif
726698ae660SFlorian Hahn 
72703975b7fSFlorian Hahn void VPInstruction::execute(VPTransformState &State) {
728aae7ac66SFlorian Hahn   assert(!State.Lane && "VPInstruction executing an Lane");
72903975b7fSFlorian Hahn   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
730f23246a0SFlorian Hahn   assert((hasFastMathFlags() == isFPMathOp() ||
731f23246a0SFlorian Hahn           getOpcode() == Instruction::Select) &&
732698ae660SFlorian Hahn          "Recipe not a FPMathOp but has fast-math flags?");
733698ae660SFlorian Hahn   if (hasFastMathFlags())
734698ae660SFlorian Hahn     State.Builder.setFastMathFlags(getFastMathFlags());
73516da9d53SFlorian Hahn   State.setDebugLocFrom(getDebugLoc());
7369a5a8731SFlorian Hahn   bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
7379a5a8731SFlorian Hahn                                    (vputils::onlyFirstLaneUsed(this) ||
7389a5a8731SFlorian Hahn                                     isVectorToScalar() || isSingleScalar());
73906bb8c9fSFlorian Hahn   bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
74006bb8c9fSFlorian Hahn   if (GeneratesPerAllLanes) {
74106bb8c9fSFlorian Hahn     for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
74206bb8c9fSFlorian Hahn          Lane != NumLanes; ++Lane) {
743aae7ac66SFlorian Hahn       Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
74406bb8c9fSFlorian Hahn       assert(GeneratedValue && "generatePerLane must produce a value");
745aae7ac66SFlorian Hahn       State.set(this, GeneratedValue, VPLane(Lane));
74606bb8c9fSFlorian Hahn     }
74706c3a7d2SFlorian Hahn     return;
74806bb8c9fSFlorian Hahn   }
74906bb8c9fSFlorian Hahn 
75006c3a7d2SFlorian Hahn   Value *GeneratedValue = generate(State);
7512265bb06SFlorian Hahn   if (!hasResult())
75206c3a7d2SFlorian Hahn     return;
75306c3a7d2SFlorian Hahn   assert(GeneratedValue && "generate must produce a value");
75406c3a7d2SFlorian Hahn   assert(
75506c3a7d2SFlorian Hahn       (GeneratedValue->getType()->isVectorTy() == !GeneratesPerFirstLaneOnly ||
75606bb8c9fSFlorian Hahn        State.VF.isScalar()) &&
75706bb8c9fSFlorian Hahn       "scalar value but not only first lane defined");
75857f5d8f2SFlorian Hahn   State.set(this, GeneratedValue,
75906bb8c9fSFlorian Hahn             /*IsScalar*/ GeneratesPerFirstLaneOnly);
7602265bb06SFlorian Hahn }
761911055e3SFlorian Hahn 
762f0d5104cSLuke Lau bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
763f0d5104cSLuke Lau   if (Instruction::isBinaryOp(getOpcode()))
764f0d5104cSLuke Lau     return false;
765f0d5104cSLuke Lau   switch (getOpcode()) {
766f0d5104cSLuke Lau   case Instruction::ICmp:
767f0d5104cSLuke Lau   case Instruction::Select:
768f0d5104cSLuke Lau   case VPInstruction::AnyOf:
769f0d5104cSLuke Lau   case VPInstruction::CalculateTripCountMinusVF:
770f0d5104cSLuke Lau   case VPInstruction::CanonicalIVIncrementForPart:
771f0d5104cSLuke Lau   case VPInstruction::ExtractFromEnd:
772f0d5104cSLuke Lau   case VPInstruction::FirstOrderRecurrenceSplice:
773f0d5104cSLuke Lau   case VPInstruction::LogicalAnd:
774f0d5104cSLuke Lau   case VPInstruction::Not:
775f0d5104cSLuke Lau   case VPInstruction::PtrAdd:
776f0d5104cSLuke Lau     return false;
777f0d5104cSLuke Lau   default:
778f0d5104cSLuke Lau     return true;
779f0d5104cSLuke Lau   }
780f0d5104cSLuke Lau }
781f0d5104cSLuke Lau 
78247abbf4fSFlorian Hahn bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
78347abbf4fSFlorian Hahn   assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
78447abbf4fSFlorian Hahn   if (Instruction::isBinaryOp(getOpcode()))
78547abbf4fSFlorian Hahn     return vputils::onlyFirstLaneUsed(this);
78647abbf4fSFlorian Hahn 
78747abbf4fSFlorian Hahn   switch (getOpcode()) {
78847abbf4fSFlorian Hahn   default:
78947abbf4fSFlorian Hahn     return false;
79047abbf4fSFlorian Hahn   case Instruction::ICmp:
791f148d579SAlexey Bataev   case Instruction::Select:
7925fae408dSFlorian Hahn   case Instruction::Or:
79306bb8c9fSFlorian Hahn   case VPInstruction::PtrAdd:
79447abbf4fSFlorian Hahn     // TODO: Cover additional opcodes.
79547abbf4fSFlorian Hahn     return vputils::onlyFirstLaneUsed(this);
79647abbf4fSFlorian Hahn   case VPInstruction::ActiveLaneMask:
797413a66f3SAlexey Bataev   case VPInstruction::ExplicitVectorLength:
79847abbf4fSFlorian Hahn   case VPInstruction::CalculateTripCountMinusVF:
79947abbf4fSFlorian Hahn   case VPInstruction::CanonicalIVIncrementForPart:
80047abbf4fSFlorian Hahn   case VPInstruction::BranchOnCount:
80199d6c6d9SFlorian Hahn   case VPInstruction::BranchOnCond:
8029a5a8731SFlorian Hahn   case VPInstruction::ResumePhi:
803911055e3SFlorian Hahn     return true;
80447abbf4fSFlorian Hahn   };
80547abbf4fSFlorian Hahn   llvm_unreachable("switch should return");
80647abbf4fSFlorian Hahn }
80703975b7fSFlorian Hahn 
8082f4ebf85SFlorian Hahn bool VPInstruction::onlyFirstPartUsed(const VPValue *Op) const {
8092f4ebf85SFlorian Hahn   assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
8102f4ebf85SFlorian Hahn   if (Instruction::isBinaryOp(getOpcode()))
8112f4ebf85SFlorian Hahn     return vputils::onlyFirstPartUsed(this);
8122f4ebf85SFlorian Hahn 
8132f4ebf85SFlorian Hahn   switch (getOpcode()) {
8142f4ebf85SFlorian Hahn   default:
8152f4ebf85SFlorian Hahn     return false;
8162f4ebf85SFlorian Hahn   case Instruction::ICmp:
8172f4ebf85SFlorian Hahn   case Instruction::Select:
8182f4ebf85SFlorian Hahn     return vputils::onlyFirstPartUsed(this);
8192f4ebf85SFlorian Hahn   case VPInstruction::BranchOnCount:
8202f4ebf85SFlorian Hahn   case VPInstruction::BranchOnCond:
8212f4ebf85SFlorian Hahn   case VPInstruction::CanonicalIVIncrementForPart:
8222f4ebf85SFlorian Hahn     return true;
8232f4ebf85SFlorian Hahn   };
8242f4ebf85SFlorian Hahn   llvm_unreachable("switch should return");
8252f4ebf85SFlorian Hahn }
8262f4ebf85SFlorian Hahn 
82703975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
82803975b7fSFlorian Hahn void VPInstruction::dump() const {
82903975b7fSFlorian Hahn   VPSlotTracker SlotTracker(getParent()->getPlan());
83003975b7fSFlorian Hahn   print(dbgs(), "", SlotTracker);
83103975b7fSFlorian Hahn }
83203975b7fSFlorian Hahn 
83303975b7fSFlorian Hahn void VPInstruction::print(raw_ostream &O, const Twine &Indent,
83403975b7fSFlorian Hahn                           VPSlotTracker &SlotTracker) const {
83503975b7fSFlorian Hahn   O << Indent << "EMIT ";
83603975b7fSFlorian Hahn 
83703975b7fSFlorian Hahn   if (hasResult()) {
83803975b7fSFlorian Hahn     printAsOperand(O, SlotTracker);
83903975b7fSFlorian Hahn     O << " = ";
84003975b7fSFlorian Hahn   }
84103975b7fSFlorian Hahn 
84203975b7fSFlorian Hahn   switch (getOpcode()) {
84303975b7fSFlorian Hahn   case VPInstruction::Not:
84403975b7fSFlorian Hahn     O << "not";
84503975b7fSFlorian Hahn     break;
84603975b7fSFlorian Hahn   case VPInstruction::SLPLoad:
84703975b7fSFlorian Hahn     O << "combined load";
84803975b7fSFlorian Hahn     break;
84903975b7fSFlorian Hahn   case VPInstruction::SLPStore:
85003975b7fSFlorian Hahn     O << "combined store";
85103975b7fSFlorian Hahn     break;
85203975b7fSFlorian Hahn   case VPInstruction::ActiveLaneMask:
85303975b7fSFlorian Hahn     O << "active lane mask";
85403975b7fSFlorian Hahn     break;
8559a5a8731SFlorian Hahn   case VPInstruction::ResumePhi:
8569a5a8731SFlorian Hahn     O << "resume-phi";
8579a5a8731SFlorian Hahn     break;
858413a66f3SAlexey Bataev   case VPInstruction::ExplicitVectorLength:
859413a66f3SAlexey Bataev     O << "EXPLICIT-VECTOR-LENGTH";
860413a66f3SAlexey Bataev     break;
86103975b7fSFlorian Hahn   case VPInstruction::FirstOrderRecurrenceSplice:
86203975b7fSFlorian Hahn     O << "first-order splice";
86303975b7fSFlorian Hahn     break;
86403975b7fSFlorian Hahn   case VPInstruction::BranchOnCond:
86503975b7fSFlorian Hahn     O << "branch-on-cond";
86603975b7fSFlorian Hahn     break;
867fe1b51ffSSander de Smalen   case VPInstruction::CalculateTripCountMinusVF:
868fe1b51ffSSander de Smalen     O << "TC > VF ? TC - VF : 0";
869fe1b51ffSSander de Smalen     break;
87003fee671SDavid Sherwood   case VPInstruction::CanonicalIVIncrementForPart:
87103fee671SDavid Sherwood     O << "VF * Part +";
87203fee671SDavid Sherwood     break;
87303975b7fSFlorian Hahn   case VPInstruction::BranchOnCount:
87403975b7fSFlorian Hahn     O << "branch-on-count";
87503975b7fSFlorian Hahn     break;
87607b33013SFlorian Hahn   case VPInstruction::ExtractFromEnd:
87707b33013SFlorian Hahn     O << "extract-from-end";
87807b33013SFlorian Hahn     break;
879241fe837SFlorian Hahn   case VPInstruction::ComputeReductionResult:
880241fe837SFlorian Hahn     O << "compute-reduction-result";
881241fe837SFlorian Hahn     break;
882632317e9SFlorian Hahn   case VPInstruction::LogicalAnd:
883632317e9SFlorian Hahn     O << "logical-and";
884632317e9SFlorian Hahn     break;
88506bb8c9fSFlorian Hahn   case VPInstruction::PtrAdd:
88606bb8c9fSFlorian Hahn     O << "ptradd";
88706bb8c9fSFlorian Hahn     break;
8885fae408dSFlorian Hahn   case VPInstruction::AnyOf:
8895fae408dSFlorian Hahn     O << "any-of";
8905fae408dSFlorian Hahn     break;
89103975b7fSFlorian Hahn   default:
89203975b7fSFlorian Hahn     O << Instruction::getOpcodeName(getOpcode());
89303975b7fSFlorian Hahn   }
89403975b7fSFlorian Hahn 
895af635a55SFlorian Hahn   printFlags(O);
89693c5bae0SFlorian Hahn   printOperands(O, SlotTracker);
89703975b7fSFlorian Hahn 
898165e24aaSFlorian Hahn   if (auto DL = getDebugLoc()) {
89903975b7fSFlorian Hahn     O << ", !dbg ";
90003975b7fSFlorian Hahn     DL.print(O);
90103975b7fSFlorian Hahn   }
90203975b7fSFlorian Hahn }
90303975b7fSFlorian Hahn #endif
90403975b7fSFlorian Hahn 
905f0c5caa8SFlorian Hahn void VPIRInstruction::execute(VPTransformState &State) {
906f0c5caa8SFlorian Hahn   assert((isa<PHINode>(&I) || getNumOperands() == 0) &&
907f0c5caa8SFlorian Hahn          "Only PHINodes can have extra operands");
9085fae408dSFlorian Hahn   for (const auto &[Idx, Op] : enumerate(operands())) {
9095fae408dSFlorian Hahn     VPValue *ExitValue = Op;
910f0c5caa8SFlorian Hahn     auto Lane = vputils::isUniformAfterVectorization(ExitValue)
911f0c5caa8SFlorian Hahn                     ? VPLane::getFirstLane()
912f0c5caa8SFlorian Hahn                     : VPLane::getLastLaneForVF(State.VF);
9135fae408dSFlorian Hahn     VPBlockBase *Pred = getParent()->getPredecessors()[Idx];
9145fae408dSFlorian Hahn     auto *PredVPBB = Pred->getExitingBasicBlock();
915f0c5caa8SFlorian Hahn     BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
916f0c5caa8SFlorian Hahn     // Set insertion point in PredBB in case an extract needs to be generated.
917f0c5caa8SFlorian Hahn     // TODO: Model extracts explicitly.
918f0c5caa8SFlorian Hahn     State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
919aae7ac66SFlorian Hahn     Value *V = State.get(ExitValue, VPLane(Lane));
920f0c5caa8SFlorian Hahn     auto *Phi = cast<PHINode>(&I);
921b021464dSFlorian Hahn     // If there is no existing block for PredBB in the phi, add a new incoming
922b021464dSFlorian Hahn     // value. Otherwise update the existing incoming value for PredBB.
923b021464dSFlorian Hahn     if (Phi->getBasicBlockIndex(PredBB) == -1)
924f0c5caa8SFlorian Hahn       Phi->addIncoming(V, PredBB);
925b021464dSFlorian Hahn     else
926b021464dSFlorian Hahn       Phi->setIncomingValueForBlock(PredBB, V);
927f0c5caa8SFlorian Hahn   }
928f0c5caa8SFlorian Hahn 
929f0c5caa8SFlorian Hahn   // Advance the insert point after the wrapped IR instruction. This allows
930f0c5caa8SFlorian Hahn   // interleaving VPIRInstructions and other recipes.
931f0c5caa8SFlorian Hahn   State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
932f0c5caa8SFlorian Hahn }
933f0c5caa8SFlorian Hahn 
934fa3258ecSFlorian Hahn InstructionCost VPIRInstruction::computeCost(ElementCount VF,
935fa3258ecSFlorian Hahn                                              VPCostContext &Ctx) const {
936fa3258ecSFlorian Hahn   // The recipe wraps an existing IR instruction on the border of VPlan's scope,
937fa3258ecSFlorian Hahn   // hence it does not contribute to the cost-modeling for the VPlan.
938fa3258ecSFlorian Hahn   return 0;
939fa3258ecSFlorian Hahn }
940fa3258ecSFlorian Hahn 
94109a29fccSFlorian Hahn void VPIRInstruction::extractLastLaneOfOperand(VPBuilder &Builder) {
94209a29fccSFlorian Hahn   assert(isa<PHINode>(getInstruction()) &&
94309a29fccSFlorian Hahn          "can only add exiting operands to phi nodes");
94409a29fccSFlorian Hahn   assert(getNumOperands() == 1 && "must have a single operand");
94509a29fccSFlorian Hahn   VPValue *Exiting = getOperand(0);
94609a29fccSFlorian Hahn   if (!Exiting->isLiveIn()) {
94709a29fccSFlorian Hahn     LLVMContext &Ctx = getInstruction().getContext();
94809a29fccSFlorian Hahn     auto &Plan = *getParent()->getPlan();
94909a29fccSFlorian Hahn     Exiting = Builder.createNaryOp(
95009a29fccSFlorian Hahn         VPInstruction::ExtractFromEnd,
95109a29fccSFlorian Hahn         {Exiting,
95209a29fccSFlorian Hahn          Plan.getOrAddLiveIn(ConstantInt::get(IntegerType::get(Ctx, 32), 1))});
95309a29fccSFlorian Hahn   }
95409a29fccSFlorian Hahn   setOperand(0, Exiting);
95509a29fccSFlorian Hahn }
95609a29fccSFlorian Hahn 
957f0c5caa8SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
958f0c5caa8SFlorian Hahn void VPIRInstruction::print(raw_ostream &O, const Twine &Indent,
959f0c5caa8SFlorian Hahn                             VPSlotTracker &SlotTracker) const {
960f0c5caa8SFlorian Hahn   O << Indent << "IR " << I;
961f0c5caa8SFlorian Hahn 
962f0c5caa8SFlorian Hahn   if (getNumOperands() != 0) {
9635fae408dSFlorian Hahn     O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": ";
9645fae408dSFlorian Hahn     interleaveComma(
9655fae408dSFlorian Hahn         enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) {
9665fae408dSFlorian Hahn           Op.value()->printAsOperand(O, SlotTracker);
967e2519b67SFlorian Hahn           O << " from ";
9685fae408dSFlorian Hahn           getParent()->getPredecessors()[Op.index()]->printAsOperand(O);
9695fae408dSFlorian Hahn         });
970f0c5caa8SFlorian Hahn     O << ")";
971f0c5caa8SFlorian Hahn   }
972f0c5caa8SFlorian Hahn }
973f0c5caa8SFlorian Hahn #endif
974f0c5caa8SFlorian Hahn 
975408ebe5eSFlorian Hahn void VPWidenCallRecipe::execute(VPTransformState &State) {
9768bd02e5aSFlorian Hahn   assert(State.VF.isVector() && "not widening");
97742fb1facSFlorian Hahn   State.setDebugLocFrom(getDebugLoc());
978408ebe5eSFlorian Hahn 
9796fbbe152SFlorian Hahn   FunctionType *VFTy = Variant->getFunctionType();
980593e25ffSJay Foad   // Add return type if intrinsic is overloaded on it.
981408ebe5eSFlorian Hahn   SmallVector<Value *, 4> Args;
982e846778eSFlorian Hahn   for (const auto &I : enumerate(arg_operands())) {
983408ebe5eSFlorian Hahn     Value *Arg;
984d4c01714SGraham Hunter     // Some vectorized function variants may also take a scalar argument,
985d4c01714SGraham Hunter     // e.g. linear parameters for pointers. This needs to be the scalar value
986d4c01714SGraham Hunter     // from the start of the respective part when interleaving.
9876fbbe152SFlorian Hahn     if (!VFTy->getParamType(I.index())->isVectorTy())
988aae7ac66SFlorian Hahn       Arg = State.get(I.value(), VPLane(0));
9894d64a2bcSGraham Hunter     else
9906fbbe152SFlorian Hahn       Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
9916fbbe152SFlorian Hahn     Args.push_back(Arg);
9926fbbe152SFlorian Hahn   }
9936fbbe152SFlorian Hahn 
9946fbbe152SFlorian Hahn   assert(Variant != nullptr && "Can't create vector function.");
9956fbbe152SFlorian Hahn 
9966fbbe152SFlorian Hahn   auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
9976fbbe152SFlorian Hahn   SmallVector<OperandBundleDef, 1> OpBundles;
9986fbbe152SFlorian Hahn   if (CI)
9996fbbe152SFlorian Hahn     CI->getOperandBundlesAsDefs(OpBundles);
10006fbbe152SFlorian Hahn 
10016fbbe152SFlorian Hahn   CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles);
10026fbbe152SFlorian Hahn   setFlags(V);
10036fbbe152SFlorian Hahn 
10046fbbe152SFlorian Hahn   if (!V->getType()->isVoidTy())
10056fbbe152SFlorian Hahn     State.set(this, V);
10066fbbe152SFlorian Hahn   State.addMetadata(V, CI);
10076fbbe152SFlorian Hahn }
10086fbbe152SFlorian Hahn 
10096fbbe152SFlorian Hahn InstructionCost VPWidenCallRecipe::computeCost(ElementCount VF,
10106fbbe152SFlorian Hahn                                                VPCostContext &Ctx) const {
10116fbbe152SFlorian Hahn   return Ctx.TTI.getCallInstrCost(nullptr, Variant->getReturnType(),
10126fbbe152SFlorian Hahn                                   Variant->getFunctionType()->params(),
1013edf3a55bSJohn Brawn                                   Ctx.CostKind);
10146fbbe152SFlorian Hahn }
10156fbbe152SFlorian Hahn 
10166fbbe152SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
10176fbbe152SFlorian Hahn void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
10186fbbe152SFlorian Hahn                               VPSlotTracker &SlotTracker) const {
10196fbbe152SFlorian Hahn   O << Indent << "WIDEN-CALL ";
10206fbbe152SFlorian Hahn 
10216fbbe152SFlorian Hahn   Function *CalledFn = getCalledScalarFunction();
10226fbbe152SFlorian Hahn   if (CalledFn->getReturnType()->isVoidTy())
10236fbbe152SFlorian Hahn     O << "void ";
10246fbbe152SFlorian Hahn   else {
10256fbbe152SFlorian Hahn     printAsOperand(O, SlotTracker);
10266fbbe152SFlorian Hahn     O << " = ";
10276fbbe152SFlorian Hahn   }
10286fbbe152SFlorian Hahn 
10296fbbe152SFlorian Hahn   O << "call";
10306fbbe152SFlorian Hahn   printFlags(O);
10316fbbe152SFlorian Hahn   O << " @" << CalledFn->getName() << "(";
10326fbbe152SFlorian Hahn   interleaveComma(arg_operands(), O, [&O, &SlotTracker](VPValue *Op) {
10336fbbe152SFlorian Hahn     Op->printAsOperand(O, SlotTracker);
10346fbbe152SFlorian Hahn   });
10356fbbe152SFlorian Hahn   O << ")";
10366fbbe152SFlorian Hahn 
10376fbbe152SFlorian Hahn   O << " (using library function";
10386fbbe152SFlorian Hahn   if (Variant->hasName())
10396fbbe152SFlorian Hahn     O << ": " << Variant->getName();
10406fbbe152SFlorian Hahn   O << ")";
10416fbbe152SFlorian Hahn }
10426fbbe152SFlorian Hahn #endif
10436fbbe152SFlorian Hahn 
10446fbbe152SFlorian Hahn void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
10456fbbe152SFlorian Hahn   assert(State.VF.isVector() && "not widening");
10466fbbe152SFlorian Hahn   State.setDebugLocFrom(getDebugLoc());
10476fbbe152SFlorian Hahn 
10486fbbe152SFlorian Hahn   SmallVector<Type *, 2> TysForDecl;
10496fbbe152SFlorian Hahn   // Add return type if intrinsic is overloaded on it.
10508663b877SFinn Plummer   if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1, State.TTI))
10516fbbe152SFlorian Hahn     TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
10526fbbe152SFlorian Hahn   SmallVector<Value *, 4> Args;
10536fbbe152SFlorian Hahn   for (const auto &I : enumerate(operands())) {
10546fbbe152SFlorian Hahn     // Some intrinsics have a scalar argument - don't replace it with a
10556fbbe152SFlorian Hahn     // vector.
10566fbbe152SFlorian Hahn     Value *Arg;
105745c01e8aSFinn Plummer     if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(),
105845c01e8aSFinn Plummer                                            State.TTI))
10596fbbe152SFlorian Hahn       Arg = State.get(I.value(), VPLane(0));
10606fbbe152SFlorian Hahn     else
10616fbbe152SFlorian Hahn       Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
10628663b877SFinn Plummer     if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(),
10638663b877SFinn Plummer                                                State.TTI))
1064408ebe5eSFlorian Hahn       TysForDecl.push_back(Arg->getType());
1065408ebe5eSFlorian Hahn     Args.push_back(Arg);
1066408ebe5eSFlorian Hahn   }
1067408ebe5eSFlorian Hahn 
1068408ebe5eSFlorian Hahn   // Use vector version of the intrinsic.
1069408ebe5eSFlorian Hahn   Module *M = State.Builder.GetInsertBlock()->getModule();
10706fbbe152SFlorian Hahn   Function *VectorF =
1071fa789dffSRahul Joshi       Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
1072b759020cSLiqinWeng   assert(VectorF &&
1073b759020cSLiqinWeng          "Can't retrieve vector intrinsic or vector-predication intrinsics.");
10740fa5df19SGraham Hunter 
10756fbbe152SFlorian Hahn   auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
1076408ebe5eSFlorian Hahn   SmallVector<OperandBundleDef, 1> OpBundles;
1077e846778eSFlorian Hahn   if (CI)
1078e846778eSFlorian Hahn     CI->getOperandBundlesAsDefs(OpBundles);
1079e846778eSFlorian Hahn 
1080408ebe5eSFlorian Hahn   CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
1081408ebe5eSFlorian Hahn 
10820344123fSFlorian Hahn   setFlags(V);
1083408ebe5eSFlorian Hahn 
1084cd160a6eSFlorian Hahn   if (!V->getType()->isVoidTy())
108557f5d8f2SFlorian Hahn     State.set(this, V);
1086e846778eSFlorian Hahn   State.addMetadata(V, CI);
1087408ebe5eSFlorian Hahn }
1088408ebe5eSFlorian Hahn 
10896fbbe152SFlorian Hahn InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
10909ccf8254SFlorian Hahn                                                     VPCostContext &Ctx) const {
1091b0de7fa4SFlorian Hahn   // Some backends analyze intrinsic arguments to determine cost. Use the
1092b0de7fa4SFlorian Hahn   // underlying value for the operand if it has one. Otherwise try to use the
1093b0de7fa4SFlorian Hahn   // operand of the underlying call instruction, if there is one. Otherwise
1094b0de7fa4SFlorian Hahn   // clear Arguments.
1095b0de7fa4SFlorian Hahn   // TODO: Rework TTI interface to be independent of concrete IR values.
10969ccf8254SFlorian Hahn   SmallVector<const Value *> Arguments;
1097b0de7fa4SFlorian Hahn   for (const auto &[Idx, Op] : enumerate(operands())) {
10989ccf8254SFlorian Hahn     auto *V = Op->getUnderlyingValue();
10999ccf8254SFlorian Hahn     if (!V) {
11004a3f46deSLiqinWeng       // Push all the VP Intrinsic's ops into the Argments even if is nullptr.
11014a3f46deSLiqinWeng       // Some VP Intrinsic's cost will assert the number of parameters.
11024a3f46deSLiqinWeng       // Mainly appears in the following two scenarios:
11034a3f46deSLiqinWeng       // 1. EVL Op is nullptr
11044a3f46deSLiqinWeng       // 2. The Argmunt of the VP Intrinsic is also the VP Intrinsic
11054a3f46deSLiqinWeng       if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)) {
11064a3f46deSLiqinWeng         Arguments.push_back(V);
11074a3f46deSLiqinWeng         continue;
11084a3f46deSLiqinWeng       }
1109b0de7fa4SFlorian Hahn       if (auto *UI = dyn_cast_or_null<CallBase>(getUnderlyingValue())) {
1110b0de7fa4SFlorian Hahn         Arguments.push_back(UI->getArgOperand(Idx));
1111b0de7fa4SFlorian Hahn         continue;
1112b0de7fa4SFlorian Hahn       }
11139ccf8254SFlorian Hahn       Arguments.clear();
11149ccf8254SFlorian Hahn       break;
11159ccf8254SFlorian Hahn     }
11169ccf8254SFlorian Hahn     Arguments.push_back(V);
11179ccf8254SFlorian Hahn   }
11189ccf8254SFlorian Hahn 
11199ab5474eSBenjamin Maxwell   Type *RetTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
11209ccf8254SFlorian Hahn   SmallVector<Type *> ParamTys;
11219ccf8254SFlorian Hahn   for (unsigned I = 0; I != getNumOperands(); ++I)
11229ccf8254SFlorian Hahn     ParamTys.push_back(
11239ab5474eSBenjamin Maxwell         toVectorTy(Ctx.Types.inferScalarType(getOperand(I)), VF));
11249ccf8254SFlorian Hahn 
112550a02e7cSFlorian Hahn   // TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
11260344123fSFlorian Hahn   FastMathFlags FMF = hasFastMathFlags() ? getFastMathFlags() : FastMathFlags();
112750a02e7cSFlorian Hahn   IntrinsicCostAttributes CostAttrs(
112850a02e7cSFlorian Hahn       VectorIntrinsicID, RetTy, Arguments, ParamTys, FMF,
112950a02e7cSFlorian Hahn       dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()));
1130edf3a55bSJohn Brawn   return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, Ctx.CostKind);
11319ccf8254SFlorian Hahn }
11329ccf8254SFlorian Hahn 
11336fbbe152SFlorian Hahn StringRef VPWidenIntrinsicRecipe::getIntrinsicName() const {
11346fbbe152SFlorian Hahn   return Intrinsic::getBaseName(VectorIntrinsicID);
11356fbbe152SFlorian Hahn }
113603975b7fSFlorian Hahn 
113734cdd67cSFlorian Hahn bool VPWidenIntrinsicRecipe::onlyFirstLaneUsed(const VPValue *Op) const {
113834cdd67cSFlorian Hahn   assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
113934cdd67cSFlorian Hahn   // Vector predication intrinsics only demand the the first lane the last
114034cdd67cSFlorian Hahn   // operand (the EVL operand).
114134cdd67cSFlorian Hahn   return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
114234cdd67cSFlorian Hahn          Op == getOperand(getNumOperands() - 1);
114334cdd67cSFlorian Hahn }
114434cdd67cSFlorian Hahn 
11456fbbe152SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
11466fbbe152SFlorian Hahn void VPWidenIntrinsicRecipe::print(raw_ostream &O, const Twine &Indent,
11476fbbe152SFlorian Hahn                                    VPSlotTracker &SlotTracker) const {
11486fbbe152SFlorian Hahn   O << Indent << "WIDEN-INTRINSIC ";
11496fbbe152SFlorian Hahn   if (ResultTy->isVoidTy()) {
115003975b7fSFlorian Hahn     O << "void ";
11516fbbe152SFlorian Hahn   } else {
115203975b7fSFlorian Hahn     printAsOperand(O, SlotTracker);
115303975b7fSFlorian Hahn     O << " = ";
115403975b7fSFlorian Hahn   }
115503975b7fSFlorian Hahn 
11560344123fSFlorian Hahn   O << "call";
11570344123fSFlorian Hahn   printFlags(O);
11586fbbe152SFlorian Hahn   O << getIntrinsicName() << "(";
11596fbbe152SFlorian Hahn 
11606fbbe152SFlorian Hahn   interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
1161e846778eSFlorian Hahn     Op->printAsOperand(O, SlotTracker);
1162e846778eSFlorian Hahn   });
116303975b7fSFlorian Hahn   O << ")";
116403975b7fSFlorian Hahn }
11656f1a8c2dSGraham Hunter #endif
11666f1a8c2dSGraham Hunter 
11676f1a8c2dSGraham Hunter void VPHistogramRecipe::execute(VPTransformState &State) {
11686f1a8c2dSGraham Hunter   State.setDebugLocFrom(getDebugLoc());
11696f1a8c2dSGraham Hunter   IRBuilderBase &Builder = State.Builder;
11706f1a8c2dSGraham Hunter 
11716f1a8c2dSGraham Hunter   Value *Address = State.get(getOperand(0));
11726f1a8c2dSGraham Hunter   Value *IncAmt = State.get(getOperand(1), /*IsScalar=*/true);
11736f1a8c2dSGraham Hunter   VectorType *VTy = cast<VectorType>(Address->getType());
11746f1a8c2dSGraham Hunter 
11756f1a8c2dSGraham Hunter   // The histogram intrinsic requires a mask even if the recipe doesn't;
11766f1a8c2dSGraham Hunter   // if the mask operand was omitted then all lanes should be executed and
11776f1a8c2dSGraham Hunter   // we just need to synthesize an all-true mask.
11786f1a8c2dSGraham Hunter   Value *Mask = nullptr;
11796f1a8c2dSGraham Hunter   if (VPValue *VPMask = getMask())
11806f1a8c2dSGraham Hunter     Mask = State.get(VPMask);
11816f1a8c2dSGraham Hunter   else
11826f1a8c2dSGraham Hunter     Mask =
11836f1a8c2dSGraham Hunter         Builder.CreateVectorSplat(VTy->getElementCount(), Builder.getInt1(1));
11846f1a8c2dSGraham Hunter 
11856f1a8c2dSGraham Hunter   // If this is a subtract, we want to invert the increment amount. We may
11866f1a8c2dSGraham Hunter   // add a separate intrinsic in future, but for now we'll try this.
11876f1a8c2dSGraham Hunter   if (Opcode == Instruction::Sub)
11886f1a8c2dSGraham Hunter     IncAmt = Builder.CreateNeg(IncAmt);
11896f1a8c2dSGraham Hunter   else
11906f1a8c2dSGraham Hunter     assert(Opcode == Instruction::Add && "only add or sub supported for now");
11916f1a8c2dSGraham Hunter 
11926f1a8c2dSGraham Hunter   State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
11936f1a8c2dSGraham Hunter                                 {VTy, IncAmt->getType()},
11946f1a8c2dSGraham Hunter                                 {Address, IncAmt, Mask});
11956f1a8c2dSGraham Hunter }
11966f1a8c2dSGraham Hunter 
11976f1a8c2dSGraham Hunter InstructionCost VPHistogramRecipe::computeCost(ElementCount VF,
11986f1a8c2dSGraham Hunter                                                VPCostContext &Ctx) const {
11996f1a8c2dSGraham Hunter   // FIXME: Take the gather and scatter into account as well. For now we're
12006f1a8c2dSGraham Hunter   //        generating the same cost as the fallback path, but we'll likely
12016f1a8c2dSGraham Hunter   //        need to create a new TTI method for determining the cost, including
12026f1a8c2dSGraham Hunter   //        whether we can use base + vec-of-smaller-indices or just
12036f1a8c2dSGraham Hunter   //        vec-of-pointers.
12046f1a8c2dSGraham Hunter   assert(VF.isVector() && "Invalid VF for histogram cost");
12056f1a8c2dSGraham Hunter   Type *AddressTy = Ctx.Types.inferScalarType(getOperand(0));
12066f1a8c2dSGraham Hunter   VPValue *IncAmt = getOperand(1);
12076f1a8c2dSGraham Hunter   Type *IncTy = Ctx.Types.inferScalarType(IncAmt);
12086f1a8c2dSGraham Hunter   VectorType *VTy = VectorType::get(IncTy, VF);
12096f1a8c2dSGraham Hunter 
12106f1a8c2dSGraham Hunter   // Assume that a non-constant update value (or a constant != 1) requires
12116f1a8c2dSGraham Hunter   // a multiply, and add that into the cost.
12126f1a8c2dSGraham Hunter   InstructionCost MulCost =
1213edf3a55bSJohn Brawn       Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy, Ctx.CostKind);
12146f1a8c2dSGraham Hunter   if (IncAmt->isLiveIn()) {
12156f1a8c2dSGraham Hunter     ConstantInt *CI = dyn_cast<ConstantInt>(IncAmt->getLiveInIRValue());
12166f1a8c2dSGraham Hunter 
12176f1a8c2dSGraham Hunter     if (CI && CI->getZExtValue() == 1)
12186f1a8c2dSGraham Hunter       MulCost = TTI::TCC_Free;
12196f1a8c2dSGraham Hunter   }
12206f1a8c2dSGraham Hunter 
12216f1a8c2dSGraham Hunter   // Find the cost of the histogram operation itself.
12226f1a8c2dSGraham Hunter   Type *PtrTy = VectorType::get(AddressTy, VF);
12236f1a8c2dSGraham Hunter   Type *MaskTy = VectorType::get(Type::getInt1Ty(Ctx.LLVMCtx), VF);
12246f1a8c2dSGraham Hunter   IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_histogram_add,
12256f1a8c2dSGraham Hunter                               Type::getVoidTy(Ctx.LLVMCtx),
12266f1a8c2dSGraham Hunter                               {PtrTy, IncTy, MaskTy});
12276f1a8c2dSGraham Hunter 
12286f1a8c2dSGraham Hunter   // Add the costs together with the add/sub operation.
1229edf3a55bSJohn Brawn   return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind) + MulCost +
1230edf3a55bSJohn Brawn          Ctx.TTI.getArithmeticInstrCost(Opcode, VTy, Ctx.CostKind);
12316f1a8c2dSGraham Hunter }
12326f1a8c2dSGraham Hunter 
12336f1a8c2dSGraham Hunter #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
12346f1a8c2dSGraham Hunter void VPHistogramRecipe::print(raw_ostream &O, const Twine &Indent,
12356f1a8c2dSGraham Hunter                               VPSlotTracker &SlotTracker) const {
12366f1a8c2dSGraham Hunter   O << Indent << "WIDEN-HISTOGRAM buckets: ";
12376f1a8c2dSGraham Hunter   getOperand(0)->printAsOperand(O, SlotTracker);
12386f1a8c2dSGraham Hunter 
12396f1a8c2dSGraham Hunter   if (Opcode == Instruction::Sub)
12406f1a8c2dSGraham Hunter     O << ", dec: ";
12416f1a8c2dSGraham Hunter   else {
12426f1a8c2dSGraham Hunter     assert(Opcode == Instruction::Add);
12436f1a8c2dSGraham Hunter     O << ", inc: ";
12446f1a8c2dSGraham Hunter   }
12456f1a8c2dSGraham Hunter   getOperand(1)->printAsOperand(O, SlotTracker);
12466f1a8c2dSGraham Hunter 
12476f1a8c2dSGraham Hunter   if (VPValue *Mask = getMask()) {
12486f1a8c2dSGraham Hunter     O << ", mask: ";
12496f1a8c2dSGraham Hunter     Mask->printAsOperand(O, SlotTracker);
12506f1a8c2dSGraham Hunter   }
12516f1a8c2dSGraham Hunter }
125203975b7fSFlorian Hahn 
125303975b7fSFlorian Hahn void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
125403975b7fSFlorian Hahn                                 VPSlotTracker &SlotTracker) const {
125503975b7fSFlorian Hahn   O << Indent << "WIDEN-SELECT ";
125603975b7fSFlorian Hahn   printAsOperand(O, SlotTracker);
125703975b7fSFlorian Hahn   O << " = select ";
12580294dab7SLiqinWeng   printFlags(O);
125903975b7fSFlorian Hahn   getOperand(0)->printAsOperand(O, SlotTracker);
126003975b7fSFlorian Hahn   O << ", ";
126103975b7fSFlorian Hahn   getOperand(1)->printAsOperand(O, SlotTracker);
126203975b7fSFlorian Hahn   O << ", ";
126303975b7fSFlorian Hahn   getOperand(2)->printAsOperand(O, SlotTracker);
126454558fd8SFlorian Hahn   O << (isInvariantCond() ? " (condition is loop invariant)" : "");
126503975b7fSFlorian Hahn }
12660c27b388SFlorian Hahn #endif
126703975b7fSFlorian Hahn 
12680c27b388SFlorian Hahn void VPWidenSelectRecipe::execute(VPTransformState &State) {
1269165e24aaSFlorian Hahn   State.setDebugLocFrom(getDebugLoc());
12700c27b388SFlorian Hahn 
12710c27b388SFlorian Hahn   // The condition can be loop invariant but still defined inside the
12720c27b388SFlorian Hahn   // loop. This means that we can't just use the original 'cond' value.
12730c27b388SFlorian Hahn   // We have to take the 'vectorized' value and pick the first lane.
12740c27b388SFlorian Hahn   // Instcombine will make this a no-op.
12750c27b388SFlorian Hahn   auto *InvarCond =
1276aae7ac66SFlorian Hahn       isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr;
12770c27b388SFlorian Hahn 
127857f5d8f2SFlorian Hahn   Value *Cond = InvarCond ? InvarCond : State.get(getCond());
127957f5d8f2SFlorian Hahn   Value *Op0 = State.get(getOperand(1));
128057f5d8f2SFlorian Hahn   Value *Op1 = State.get(getOperand(2));
12810c27b388SFlorian Hahn   Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
128257f5d8f2SFlorian Hahn   State.set(this, Sel);
12830294dab7SLiqinWeng   if (isa<FPMathOperator>(Sel))
12840294dab7SLiqinWeng     setFlags(cast<Instruction>(Sel));
1285165e24aaSFlorian Hahn   State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
12860c27b388SFlorian Hahn }
12870c27b388SFlorian Hahn 
12881d9b3222SFlorian Hahn InstructionCost VPWidenSelectRecipe::computeCost(ElementCount VF,
12891d9b3222SFlorian Hahn                                                  VPCostContext &Ctx) const {
12901d9b3222SFlorian Hahn   SelectInst *SI = cast<SelectInst>(getUnderlyingValue());
12911d9b3222SFlorian Hahn   bool ScalarCond = getOperand(0)->isDefinedOutsideLoopRegions();
12921d9b3222SFlorian Hahn   Type *ScalarTy = Ctx.Types.inferScalarType(this);
12939ab5474eSBenjamin Maxwell   Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
12941d9b3222SFlorian Hahn 
12951d9b3222SFlorian Hahn   VPValue *Op0, *Op1;
12961d9b3222SFlorian Hahn   using namespace llvm::VPlanPatternMatch;
12971d9b3222SFlorian Hahn   if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 &&
12981d9b3222SFlorian Hahn       (match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) ||
12991d9b3222SFlorian Hahn        match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) {
13001d9b3222SFlorian Hahn     // select x, y, false --> x & y
13011d9b3222SFlorian Hahn     // select x, true, y --> x | y
13021d9b3222SFlorian Hahn     const auto [Op1VK, Op1VP] = Ctx.getOperandInfo(Op0);
13031d9b3222SFlorian Hahn     const auto [Op2VK, Op2VP] = Ctx.getOperandInfo(Op1);
13041d9b3222SFlorian Hahn 
13051d9b3222SFlorian Hahn     SmallVector<const Value *, 2> Operands;
13061d9b3222SFlorian Hahn     if (all_of(operands(),
13071d9b3222SFlorian Hahn                [](VPValue *Op) { return Op->getUnderlyingValue(); }))
13081d9b3222SFlorian Hahn       Operands.append(SI->op_begin(), SI->op_end());
13091d9b3222SFlorian Hahn     bool IsLogicalOr = match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1)));
13101d9b3222SFlorian Hahn     return Ctx.TTI.getArithmeticInstrCost(
1311edf3a55bSJohn Brawn         IsLogicalOr ? Instruction::Or : Instruction::And, VectorTy,
1312edf3a55bSJohn Brawn         Ctx.CostKind, {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands, SI);
13131d9b3222SFlorian Hahn   }
13141d9b3222SFlorian Hahn 
13151d9b3222SFlorian Hahn   Type *CondTy = Ctx.Types.inferScalarType(getOperand(0));
13161d9b3222SFlorian Hahn   if (!ScalarCond)
13171d9b3222SFlorian Hahn     CondTy = VectorType::get(CondTy, VF);
13181d9b3222SFlorian Hahn 
13191d9b3222SFlorian Hahn   CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
13201d9b3222SFlorian Hahn   if (auto *Cmp = dyn_cast<CmpInst>(SI->getCondition()))
13211d9b3222SFlorian Hahn     Pred = Cmp->getPredicate();
1322edf3a55bSJohn Brawn   return Ctx.TTI.getCmpSelInstrCost(
1323edf3a55bSJohn Brawn       Instruction::Select, VectorTy, CondTy, Pred, Ctx.CostKind,
1324edf3a55bSJohn Brawn       {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, SI);
13251d9b3222SFlorian Hahn }
13261d9b3222SFlorian Hahn 
1327698ae660SFlorian Hahn VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
1328698ae660SFlorian Hahn     const FastMathFlags &FMF) {
1329698ae660SFlorian Hahn   AllowReassoc = FMF.allowReassoc();
1330698ae660SFlorian Hahn   NoNaNs = FMF.noNaNs();
1331698ae660SFlorian Hahn   NoInfs = FMF.noInfs();
1332698ae660SFlorian Hahn   NoSignedZeros = FMF.noSignedZeros();
1333698ae660SFlorian Hahn   AllowReciprocal = FMF.allowReciprocal();
1334698ae660SFlorian Hahn   AllowContract = FMF.allowContract();
1335698ae660SFlorian Hahn   ApproxFunc = FMF.approxFunc();
1336698ae660SFlorian Hahn }
1337698ae660SFlorian Hahn 
1338299f0ff6SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1339299f0ff6SFlorian Hahn void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
1340299f0ff6SFlorian Hahn   switch (OpType) {
1341fd661957SFlorian Hahn   case OperationType::Cmp:
1342fd661957SFlorian Hahn     O << " " << CmpInst::getPredicateName(getPredicate());
1343fd661957SFlorian Hahn     break;
1344bbd1941aSFlorian Hahn   case OperationType::DisjointOp:
1345bbd1941aSFlorian Hahn     if (DisjointFlags.IsDisjoint)
1346bbd1941aSFlorian Hahn       O << " disjoint";
1347bbd1941aSFlorian Hahn     break;
1348299f0ff6SFlorian Hahn   case OperationType::PossiblyExactOp:
1349299f0ff6SFlorian Hahn     if (ExactFlags.IsExact)
1350299f0ff6SFlorian Hahn       O << " exact";
1351299f0ff6SFlorian Hahn     break;
1352299f0ff6SFlorian Hahn   case OperationType::OverflowingBinOp:
1353299f0ff6SFlorian Hahn     if (WrapFlags.HasNUW)
1354299f0ff6SFlorian Hahn       O << " nuw";
1355299f0ff6SFlorian Hahn     if (WrapFlags.HasNSW)
1356299f0ff6SFlorian Hahn       O << " nsw";
1357299f0ff6SFlorian Hahn     break;
1358299f0ff6SFlorian Hahn   case OperationType::FPMathOp:
1359299f0ff6SFlorian Hahn     getFastMathFlags().print(O);
1360299f0ff6SFlorian Hahn     break;
1361299f0ff6SFlorian Hahn   case OperationType::GEPOp:
136211571874SNikita Popov     if (GEPFlags.isInBounds())
1363299f0ff6SFlorian Hahn       O << " inbounds";
136411571874SNikita Popov     else if (GEPFlags.hasNoUnsignedSignedWrap())
136511571874SNikita Popov       O << " nusw";
136611571874SNikita Popov     if (GEPFlags.hasNoUnsignedWrap())
136711571874SNikita Popov       O << " nuw";
1368299f0ff6SFlorian Hahn     break;
1369056367bbSAlexey Bataev   case OperationType::NonNegOp:
1370056367bbSAlexey Bataev     if (NonNegFlags.NonNeg)
1371056367bbSAlexey Bataev       O << " nneg";
1372056367bbSAlexey Bataev     break;
1373299f0ff6SFlorian Hahn   case OperationType::Other:
1374299f0ff6SFlorian Hahn     break;
1375299f0ff6SFlorian Hahn   }
1376af635a55SFlorian Hahn   if (getNumOperands() > 0)
1377299f0ff6SFlorian Hahn     O << " ";
1378299f0ff6SFlorian Hahn }
1379299f0ff6SFlorian Hahn #endif
1380299f0ff6SFlorian Hahn 
138113ae2134SFlorian Hahn void VPWidenRecipe::execute(VPTransformState &State) {
1382165e24aaSFlorian Hahn   State.setDebugLocFrom(getDebugLoc());
138313ae2134SFlorian Hahn   auto &Builder = State.Builder;
1384785e7063SFlorian Hahn   switch (Opcode) {
138513ae2134SFlorian Hahn   case Instruction::Call:
138613ae2134SFlorian Hahn   case Instruction::Br:
138713ae2134SFlorian Hahn   case Instruction::PHI:
138813ae2134SFlorian Hahn   case Instruction::GetElementPtr:
138913ae2134SFlorian Hahn   case Instruction::Select:
139013ae2134SFlorian Hahn     llvm_unreachable("This instruction is handled by a different recipe.");
139113ae2134SFlorian Hahn   case Instruction::UDiv:
139213ae2134SFlorian Hahn   case Instruction::SDiv:
139313ae2134SFlorian Hahn   case Instruction::SRem:
139413ae2134SFlorian Hahn   case Instruction::URem:
139513ae2134SFlorian Hahn   case Instruction::Add:
139613ae2134SFlorian Hahn   case Instruction::FAdd:
139713ae2134SFlorian Hahn   case Instruction::Sub:
139813ae2134SFlorian Hahn   case Instruction::FSub:
139913ae2134SFlorian Hahn   case Instruction::FNeg:
140013ae2134SFlorian Hahn   case Instruction::Mul:
140113ae2134SFlorian Hahn   case Instruction::FMul:
140213ae2134SFlorian Hahn   case Instruction::FDiv:
140313ae2134SFlorian Hahn   case Instruction::FRem:
140413ae2134SFlorian Hahn   case Instruction::Shl:
140513ae2134SFlorian Hahn   case Instruction::LShr:
140613ae2134SFlorian Hahn   case Instruction::AShr:
140713ae2134SFlorian Hahn   case Instruction::And:
140813ae2134SFlorian Hahn   case Instruction::Or:
140913ae2134SFlorian Hahn   case Instruction::Xor: {
141013ae2134SFlorian Hahn     // Just widen unops and binops.
141113ae2134SFlorian Hahn     SmallVector<Value *, 2> Ops;
141213ae2134SFlorian Hahn     for (VPValue *VPOp : operands())
141357f5d8f2SFlorian Hahn       Ops.push_back(State.get(VPOp));
141413ae2134SFlorian Hahn 
1415785e7063SFlorian Hahn     Value *V = Builder.CreateNAryOp(Opcode, Ops);
141613ae2134SFlorian Hahn 
1417127b00b2SFlorian Hahn     if (auto *VecOp = dyn_cast<Instruction>(V))
1418127b00b2SFlorian Hahn       setFlags(VecOp);
141913ae2134SFlorian Hahn 
142013ae2134SFlorian Hahn     // Use this vector value for all users of the original instruction.
142157f5d8f2SFlorian Hahn     State.set(this, V);
1422785e7063SFlorian Hahn     State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
142313ae2134SFlorian Hahn     break;
142413ae2134SFlorian Hahn   }
142513ae2134SFlorian Hahn   case Instruction::Freeze: {
142657f5d8f2SFlorian Hahn     Value *Op = State.get(getOperand(0));
142713ae2134SFlorian Hahn 
142813ae2134SFlorian Hahn     Value *Freeze = Builder.CreateFreeze(Op);
142957f5d8f2SFlorian Hahn     State.set(this, Freeze);
143013ae2134SFlorian Hahn     break;
143113ae2134SFlorian Hahn   }
143213ae2134SFlorian Hahn   case Instruction::ICmp:
143313ae2134SFlorian Hahn   case Instruction::FCmp: {
143413ae2134SFlorian Hahn     // Widen compares. Generate vector compares.
1435785e7063SFlorian Hahn     bool FCmp = Opcode == Instruction::FCmp;
143657f5d8f2SFlorian Hahn     Value *A = State.get(getOperand(0));
143757f5d8f2SFlorian Hahn     Value *B = State.get(getOperand(1));
143813ae2134SFlorian Hahn     Value *C = nullptr;
143913ae2134SFlorian Hahn     if (FCmp) {
144013ae2134SFlorian Hahn       // Propagate fast math flags.
1441a77346baSYingwei Zheng       C = Builder.CreateFCmpFMF(
1442a77346baSYingwei Zheng           getPredicate(), A, B,
1443a77346baSYingwei Zheng           dyn_cast_or_null<Instruction>(getUnderlyingValue()));
144413ae2134SFlorian Hahn     } else {
1445785e7063SFlorian Hahn       C = Builder.CreateICmp(getPredicate(), A, B);
144613ae2134SFlorian Hahn     }
144757f5d8f2SFlorian Hahn     State.set(this, C);
1448785e7063SFlorian Hahn     State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
144913ae2134SFlorian Hahn     break;
145013ae2134SFlorian Hahn   }
145113ae2134SFlorian Hahn   default:
145213ae2134SFlorian Hahn     // This instruction is not vectorized by simple widening.
1453785e7063SFlorian Hahn     LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
1454785e7063SFlorian Hahn                       << Instruction::getOpcodeName(Opcode));
145513ae2134SFlorian Hahn     llvm_unreachable("Unhandled instruction!");
145613ae2134SFlorian Hahn   } // end of switch.
1457b0b88643SFlorian Hahn 
1458b0b88643SFlorian Hahn #if !defined(NDEBUG)
1459b0b88643SFlorian Hahn   // Verify that VPlan type inference results agree with the type of the
1460b0b88643SFlorian Hahn   // generated values.
146106c3a7d2SFlorian Hahn   assert(VectorType::get(State.TypeAnalysis.inferScalarType(this), State.VF) ==
146257f5d8f2SFlorian Hahn              State.get(this)->getType() &&
1463b0b88643SFlorian Hahn          "inferred type and type from generated instructions do not match");
1464b0b88643SFlorian Hahn #endif
1465b0b88643SFlorian Hahn }
1466b0b88643SFlorian Hahn 
14671aa8a6f6SFlorian Hahn InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
14681aa8a6f6SFlorian Hahn                                            VPCostContext &Ctx) const {
14691aa8a6f6SFlorian Hahn   switch (Opcode) {
14701aa8a6f6SFlorian Hahn   case Instruction::FNeg: {
14719ab5474eSBenjamin Maxwell     Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
14721aa8a6f6SFlorian Hahn     return Ctx.TTI.getArithmeticInstrCost(
1473edf3a55bSJohn Brawn         Opcode, VectorTy, Ctx.CostKind,
14741aa8a6f6SFlorian Hahn         {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
14751aa8a6f6SFlorian Hahn         {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None});
14761aa8a6f6SFlorian Hahn   }
14771aa8a6f6SFlorian Hahn 
14781aa8a6f6SFlorian Hahn   case Instruction::UDiv:
14791aa8a6f6SFlorian Hahn   case Instruction::SDiv:
14801aa8a6f6SFlorian Hahn   case Instruction::SRem:
14811aa8a6f6SFlorian Hahn   case Instruction::URem:
14821aa8a6f6SFlorian Hahn     // More complex computation, let the legacy cost-model handle this for now.
14831aa8a6f6SFlorian Hahn     return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF);
14841aa8a6f6SFlorian Hahn   case Instruction::Add:
14851aa8a6f6SFlorian Hahn   case Instruction::FAdd:
14861aa8a6f6SFlorian Hahn   case Instruction::Sub:
14871aa8a6f6SFlorian Hahn   case Instruction::FSub:
14881aa8a6f6SFlorian Hahn   case Instruction::Mul:
14891aa8a6f6SFlorian Hahn   case Instruction::FMul:
14901aa8a6f6SFlorian Hahn   case Instruction::FDiv:
14911aa8a6f6SFlorian Hahn   case Instruction::FRem:
14921aa8a6f6SFlorian Hahn   case Instruction::Shl:
14931aa8a6f6SFlorian Hahn   case Instruction::LShr:
14941aa8a6f6SFlorian Hahn   case Instruction::AShr:
14951aa8a6f6SFlorian Hahn   case Instruction::And:
14961aa8a6f6SFlorian Hahn   case Instruction::Or:
14971aa8a6f6SFlorian Hahn   case Instruction::Xor: {
14981aa8a6f6SFlorian Hahn     VPValue *RHS = getOperand(1);
14991aa8a6f6SFlorian Hahn     // Certain instructions can be cheaper to vectorize if they have a constant
15001aa8a6f6SFlorian Hahn     // second vector operand. One example of this are shifts on x86.
15011aa8a6f6SFlorian Hahn     TargetTransformInfo::OperandValueInfo RHSInfo = {
15021aa8a6f6SFlorian Hahn         TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None};
15031aa8a6f6SFlorian Hahn     if (RHS->isLiveIn())
15041aa8a6f6SFlorian Hahn       RHSInfo = Ctx.TTI.getOperandInfo(RHS->getLiveInIRValue());
15051aa8a6f6SFlorian Hahn 
15061aa8a6f6SFlorian Hahn     if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
150725610048SFlorian Hahn         getOperand(1)->isDefinedOutsideLoopRegions())
15081aa8a6f6SFlorian Hahn       RHSInfo.Kind = TargetTransformInfo::OK_UniformValue;
15099ab5474eSBenjamin Maxwell     Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
15101aa8a6f6SFlorian Hahn     Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
15111aa8a6f6SFlorian Hahn 
15121aa8a6f6SFlorian Hahn     SmallVector<const Value *, 4> Operands;
15131aa8a6f6SFlorian Hahn     if (CtxI)
15141aa8a6f6SFlorian Hahn       Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
15151aa8a6f6SFlorian Hahn     return Ctx.TTI.getArithmeticInstrCost(
1516edf3a55bSJohn Brawn         Opcode, VectorTy, Ctx.CostKind,
15171aa8a6f6SFlorian Hahn         {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
15181aa8a6f6SFlorian Hahn         RHSInfo, Operands, CtxI, &Ctx.TLI);
15191aa8a6f6SFlorian Hahn   }
15201aa8a6f6SFlorian Hahn   case Instruction::Freeze: {
15211aa8a6f6SFlorian Hahn     // This opcode is unknown. Assume that it is the same as 'mul'.
15229ab5474eSBenjamin Maxwell     Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1523edf3a55bSJohn Brawn     return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy,
1524edf3a55bSJohn Brawn                                           Ctx.CostKind);
15251aa8a6f6SFlorian Hahn   }
15261aa8a6f6SFlorian Hahn   case Instruction::ICmp:
15271aa8a6f6SFlorian Hahn   case Instruction::FCmp: {
15281aa8a6f6SFlorian Hahn     Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
15299ab5474eSBenjamin Maxwell     Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
15301aa8a6f6SFlorian Hahn     return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(),
1531edf3a55bSJohn Brawn                                       Ctx.CostKind,
1532d2885743SPhilip Reames                                       {TTI::OK_AnyValue, TTI::OP_None},
1533d2885743SPhilip Reames                                       {TTI::OK_AnyValue, TTI::OP_None}, CtxI);
15341aa8a6f6SFlorian Hahn   }
15351aa8a6f6SFlorian Hahn   default:
15361aa8a6f6SFlorian Hahn     llvm_unreachable("Unsupported opcode for instruction");
15371aa8a6f6SFlorian Hahn   }
15381aa8a6f6SFlorian Hahn }
15391aa8a6f6SFlorian Hahn 
154000e40c9bSKolya Panchenko void VPWidenEVLRecipe::execute(VPTransformState &State) {
154100e40c9bSKolya Panchenko   unsigned Opcode = getOpcode();
154200e40c9bSKolya Panchenko   // TODO: Support other opcodes
154300e40c9bSKolya Panchenko   if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
154400e40c9bSKolya Panchenko     llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
154500e40c9bSKolya Panchenko 
154600e40c9bSKolya Panchenko   State.setDebugLocFrom(getDebugLoc());
154700e40c9bSKolya Panchenko 
154857f5d8f2SFlorian Hahn   assert(State.get(getOperand(0))->getType()->isVectorTy() &&
154900e40c9bSKolya Panchenko          "VPWidenEVLRecipe should not be used for scalars");
155000e40c9bSKolya Panchenko 
155100e40c9bSKolya Panchenko   VPValue *EVL = getEVL();
155257f5d8f2SFlorian Hahn   Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true);
155300e40c9bSKolya Panchenko   IRBuilderBase &BuilderIR = State.Builder;
155400e40c9bSKolya Panchenko   VectorBuilder Builder(BuilderIR);
155500e40c9bSKolya Panchenko   Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
155600e40c9bSKolya Panchenko 
155700e40c9bSKolya Panchenko   SmallVector<Value *, 4> Ops;
155800e40c9bSKolya Panchenko   for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
155900e40c9bSKolya Panchenko     VPValue *VPOp = getOperand(I);
156057f5d8f2SFlorian Hahn     Ops.push_back(State.get(VPOp));
156100e40c9bSKolya Panchenko   }
156200e40c9bSKolya Panchenko 
156300e40c9bSKolya Panchenko   Builder.setMask(Mask).setEVL(EVLArg);
156400e40c9bSKolya Panchenko   Value *VPInst =
156500e40c9bSKolya Panchenko       Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op");
156600e40c9bSKolya Panchenko   // Currently vp-intrinsics only accept FMF flags.
156700e40c9bSKolya Panchenko   // TODO: Enable other flags when support is added.
156800e40c9bSKolya Panchenko   if (isa<FPMathOperator>(VPInst))
156900e40c9bSKolya Panchenko     setFlags(cast<Instruction>(VPInst));
157000e40c9bSKolya Panchenko 
157157f5d8f2SFlorian Hahn   State.set(this, VPInst);
157200e40c9bSKolya Panchenko   State.addMetadata(VPInst,
157300e40c9bSKolya Panchenko                     dyn_cast_or_null<Instruction>(getUnderlyingValue()));
157400e40c9bSKolya Panchenko }
157500e40c9bSKolya Panchenko 
15760c27b388SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
157703975b7fSFlorian Hahn void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
157803975b7fSFlorian Hahn                           VPSlotTracker &SlotTracker) const {
157903975b7fSFlorian Hahn   O << Indent << "WIDEN ";
158003975b7fSFlorian Hahn   printAsOperand(O, SlotTracker);
1581785e7063SFlorian Hahn   O << " = " << Instruction::getOpcodeName(Opcode);
1582299f0ff6SFlorian Hahn   printFlags(O);
158303975b7fSFlorian Hahn   printOperands(O, SlotTracker);
158403975b7fSFlorian Hahn }
158500e40c9bSKolya Panchenko 
158600e40c9bSKolya Panchenko void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent,
158700e40c9bSKolya Panchenko                              VPSlotTracker &SlotTracker) const {
1588a2994b29SLiqinWeng   O << Indent << "WIDEN ";
158900e40c9bSKolya Panchenko   printAsOperand(O, SlotTracker);
1590a2994b29SLiqinWeng   O << " = vp." << Instruction::getOpcodeName(getOpcode());
159100e40c9bSKolya Panchenko   printFlags(O);
159200e40c9bSKolya Panchenko   printOperands(O, SlotTracker);
159300e40c9bSKolya Panchenko }
1594e3afe0b8SFlorian Hahn #endif
1595e3afe0b8SFlorian Hahn 
1596e3afe0b8SFlorian Hahn void VPWidenCastRecipe::execute(VPTransformState &State) {
1597165e24aaSFlorian Hahn   State.setDebugLocFrom(getDebugLoc());
1598e3afe0b8SFlorian Hahn   auto &Builder = State.Builder;
1599e3afe0b8SFlorian Hahn   /// Vectorize casts.
1600e3afe0b8SFlorian Hahn   assert(State.VF.isVector() && "Not vectorizing?");
1601e3afe0b8SFlorian Hahn   Type *DestTy = VectorType::get(getResultType(), State.VF);
160270535f5eSFlorian Hahn   VPValue *Op = getOperand(0);
160357f5d8f2SFlorian Hahn   Value *A = State.get(Op);
1604e3afe0b8SFlorian Hahn   Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
160557f5d8f2SFlorian Hahn   State.set(this, Cast);
1606165e24aaSFlorian Hahn   State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
16078af5ae06SNoah Goldstein   if (auto *CastOp = dyn_cast<Instruction>(Cast))
16088af5ae06SNoah Goldstein     setFlags(CastOp);
1609e3afe0b8SFlorian Hahn }
1610e3afe0b8SFlorian Hahn 
1611b3edc764SElvis Wang InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
1612b3edc764SElvis Wang                                                VPCostContext &Ctx) const {
1613e724226dSFlorian Hahn   // TODO: In some cases, VPWidenCastRecipes are created but not considered in
1614e724226dSFlorian Hahn   // the legacy cost model, including truncates/extends when evaluating a
1615e724226dSFlorian Hahn   // reduction in a smaller type.
1616e724226dSFlorian Hahn   if (!getUnderlyingValue())
1617e724226dSFlorian Hahn     return 0;
1618b3edc764SElvis Wang   // Computes the CastContextHint from a recipes that may access memory.
1619b3edc764SElvis Wang   auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {
1620b3edc764SElvis Wang     if (VF.isScalar())
1621b3edc764SElvis Wang       return TTI::CastContextHint::Normal;
1622b3edc764SElvis Wang     if (isa<VPInterleaveRecipe>(R))
1623b3edc764SElvis Wang       return TTI::CastContextHint::Interleave;
1624b3edc764SElvis Wang     if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R))
1625b3edc764SElvis Wang       return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked
1626b3edc764SElvis Wang                                              : TTI::CastContextHint::Normal;
1627b3edc764SElvis Wang     const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R);
1628b3edc764SElvis Wang     if (WidenMemoryRecipe == nullptr)
1629b3edc764SElvis Wang       return TTI::CastContextHint::None;
1630b3edc764SElvis Wang     if (!WidenMemoryRecipe->isConsecutive())
1631b3edc764SElvis Wang       return TTI::CastContextHint::GatherScatter;
1632b3edc764SElvis Wang     if (WidenMemoryRecipe->isReverse())
1633b3edc764SElvis Wang       return TTI::CastContextHint::Reversed;
1634b3edc764SElvis Wang     if (WidenMemoryRecipe->isMasked())
1635b3edc764SElvis Wang       return TTI::CastContextHint::Masked;
1636b3edc764SElvis Wang     return TTI::CastContextHint::Normal;
1637b3edc764SElvis Wang   };
1638b3edc764SElvis Wang 
1639b3edc764SElvis Wang   VPValue *Operand = getOperand(0);
1640b3edc764SElvis Wang   TTI::CastContextHint CCH = TTI::CastContextHint::None;
1641b3edc764SElvis Wang   // For Trunc/FPTrunc, get the context from the only user.
1642b3edc764SElvis Wang   if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
1643b3edc764SElvis Wang       !hasMoreThanOneUniqueUser() && getNumUsers() > 0) {
1644b3edc764SElvis Wang     if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin()))
1645b3edc764SElvis Wang       CCH = ComputeCCH(StoreRecipe);
1646b3edc764SElvis Wang   }
1647b3edc764SElvis Wang   // For Z/Sext, get the context from the operand.
1648b3edc764SElvis Wang   else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
1649b3edc764SElvis Wang            Opcode == Instruction::FPExt) {
1650b3edc764SElvis Wang     if (Operand->isLiveIn())
1651b3edc764SElvis Wang       CCH = TTI::CastContextHint::Normal;
1652b3edc764SElvis Wang     else if (Operand->getDefiningRecipe())
1653b3edc764SElvis Wang       CCH = ComputeCCH(Operand->getDefiningRecipe());
1654b3edc764SElvis Wang   }
1655b3edc764SElvis Wang 
1656b3edc764SElvis Wang   auto *SrcTy =
16579ab5474eSBenjamin Maxwell       cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(Operand), VF));
16589ab5474eSBenjamin Maxwell   auto *DestTy = cast<VectorType>(toVectorTy(getResultType(), VF));
1659b3edc764SElvis Wang   // Arm TTI will use the underlying instruction to determine the cost.
1660b3edc764SElvis Wang   return Ctx.TTI.getCastInstrCost(
1661edf3a55bSJohn Brawn       Opcode, DestTy, SrcTy, CCH, Ctx.CostKind,
1662b3edc764SElvis Wang       dyn_cast_if_present<Instruction>(getUnderlyingValue()));
1663b3edc764SElvis Wang }
1664b3edc764SElvis Wang 
1665e3afe0b8SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1666e3afe0b8SFlorian Hahn void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
1667e3afe0b8SFlorian Hahn                               VPSlotTracker &SlotTracker) const {
1668e3afe0b8SFlorian Hahn   O << Indent << "WIDEN-CAST ";
1669e3afe0b8SFlorian Hahn   printAsOperand(O, SlotTracker);
16702d038caeSFlorian Hahn   O << " = " << Instruction::getOpcodeName(Opcode);
1671633fe601SFlorian Hahn   printFlags(O);
1672e3afe0b8SFlorian Hahn   printOperands(O, SlotTracker);
1673e3afe0b8SFlorian Hahn   O << " to " << *getResultType();
1674e3afe0b8SFlorian Hahn }
167556f5738dSFlorian Hahn #endif
167603975b7fSFlorian Hahn 
1677680901edSFlorian Hahn InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,
1678680901edSFlorian Hahn                                                VPCostContext &Ctx) const {
1679edf3a55bSJohn Brawn   return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
1680680901edSFlorian Hahn }
1681680901edSFlorian Hahn 
168256f5738dSFlorian Hahn /// This function adds
16834746395bSLuke Lau /// (0 * Step, 1 * Step, 2 * Step, ...)
16844746395bSLuke Lau /// to each vector element of Val.
168556f5738dSFlorian Hahn /// \p Opcode is relevant for FP induction variable.
16864746395bSLuke Lau static Value *getStepVector(Value *Val, Value *Step,
168756f5738dSFlorian Hahn                             Instruction::BinaryOps BinOp, ElementCount VF,
168856f5738dSFlorian Hahn                             IRBuilderBase &Builder) {
168956f5738dSFlorian Hahn   assert(VF.isVector() && "only vector VFs are supported");
169056f5738dSFlorian Hahn 
169156f5738dSFlorian Hahn   // Create and check the types.
169256f5738dSFlorian Hahn   auto *ValVTy = cast<VectorType>(Val->getType());
169356f5738dSFlorian Hahn   ElementCount VLen = ValVTy->getElementCount();
169456f5738dSFlorian Hahn 
169556f5738dSFlorian Hahn   Type *STy = Val->getType()->getScalarType();
169656f5738dSFlorian Hahn   assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
169756f5738dSFlorian Hahn          "Induction Step must be an integer or FP");
169856f5738dSFlorian Hahn   assert(Step->getType() == STy && "Step has wrong type");
169956f5738dSFlorian Hahn 
170056f5738dSFlorian Hahn   SmallVector<Constant *, 8> Indices;
170156f5738dSFlorian Hahn 
170256f5738dSFlorian Hahn   // Create a vector of consecutive numbers from zero to VF.
170356f5738dSFlorian Hahn   VectorType *InitVecValVTy = ValVTy;
170456f5738dSFlorian Hahn   if (STy->isFloatingPointTy()) {
170556f5738dSFlorian Hahn     Type *InitVecValSTy =
170656f5738dSFlorian Hahn         IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());
170756f5738dSFlorian Hahn     InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
170856f5738dSFlorian Hahn   }
170956f5738dSFlorian Hahn   Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
171056f5738dSFlorian Hahn 
171156f5738dSFlorian Hahn   if (STy->isIntegerTy()) {
171256f5738dSFlorian Hahn     Step = Builder.CreateVectorSplat(VLen, Step);
171356f5738dSFlorian Hahn     assert(Step->getType() == Val->getType() && "Invalid step vec");
171456f5738dSFlorian Hahn     // FIXME: The newly created binary instructions should contain nsw/nuw
171556f5738dSFlorian Hahn     // flags, which can be found from the original scalar operations.
171656f5738dSFlorian Hahn     Step = Builder.CreateMul(InitVec, Step);
171756f5738dSFlorian Hahn     return Builder.CreateAdd(Val, Step, "induction");
171856f5738dSFlorian Hahn   }
171956f5738dSFlorian Hahn 
172056f5738dSFlorian Hahn   // Floating point induction.
172156f5738dSFlorian Hahn   assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
172256f5738dSFlorian Hahn          "Binary Opcode should be specified for FP induction");
172356f5738dSFlorian Hahn   InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
172456f5738dSFlorian Hahn 
172556f5738dSFlorian Hahn   Step = Builder.CreateVectorSplat(VLen, Step);
172656f5738dSFlorian Hahn   Value *MulOp = Builder.CreateFMul(InitVec, Step);
172756f5738dSFlorian Hahn   return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
172856f5738dSFlorian Hahn }
172956f5738dSFlorian Hahn 
173056f5738dSFlorian Hahn /// A helper function that returns an integer or floating-point constant with
173156f5738dSFlorian Hahn /// value C.
173256f5738dSFlorian Hahn static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
173356f5738dSFlorian Hahn   return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
173456f5738dSFlorian Hahn                            : ConstantFP::get(Ty, C);
173556f5738dSFlorian Hahn }
173656f5738dSFlorian Hahn 
173756f5738dSFlorian Hahn void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
1738aae7ac66SFlorian Hahn   assert(!State.Lane && "Int or FP induction being replicated.");
173956f5738dSFlorian Hahn 
174056f5738dSFlorian Hahn   Value *Start = getStartValue()->getLiveInIRValue();
174156f5738dSFlorian Hahn   const InductionDescriptor &ID = getInductionDescriptor();
174256f5738dSFlorian Hahn   TruncInst *Trunc = getTruncInst();
174356f5738dSFlorian Hahn   IRBuilderBase &Builder = State.Builder;
174495e509a9SFlorian Hahn   assert(getPHINode()->getType() == ID.getStartValue()->getType() &&
174595e509a9SFlorian Hahn          "Types must match");
174656f5738dSFlorian Hahn   assert(State.VF.isVector() && "must have vector VF");
174756f5738dSFlorian Hahn 
174856f5738dSFlorian Hahn   // The value from the original loop to which we are mapping the new induction
174956f5738dSFlorian Hahn   // variable.
175095e509a9SFlorian Hahn   Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : getPHINode();
175156f5738dSFlorian Hahn 
175256f5738dSFlorian Hahn   // Fast-math-flags propagate from the original induction instruction.
175356f5738dSFlorian Hahn   IRBuilder<>::FastMathFlagGuard FMFG(Builder);
175456f5738dSFlorian Hahn   if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
175556f5738dSFlorian Hahn     Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
175656f5738dSFlorian Hahn 
175756f5738dSFlorian Hahn   // Now do the actual transformations, and start with fetching the step value.
1758aae7ac66SFlorian Hahn   Value *Step = State.get(getStepValue(), VPLane(0));
175956f5738dSFlorian Hahn 
176056f5738dSFlorian Hahn   assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
176156f5738dSFlorian Hahn          "Expected either an induction phi-node or a truncate of it!");
176256f5738dSFlorian Hahn 
176356f5738dSFlorian Hahn   // Construct the initial value of the vector IV in the vector loop preheader
176456f5738dSFlorian Hahn   auto CurrIP = Builder.saveIP();
176556f5738dSFlorian Hahn   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
176656f5738dSFlorian Hahn   Builder.SetInsertPoint(VectorPH->getTerminator());
176756f5738dSFlorian Hahn   if (isa<TruncInst>(EntryVal)) {
176856f5738dSFlorian Hahn     assert(Start->getType()->isIntegerTy() &&
176956f5738dSFlorian Hahn            "Truncation requires an integer type");
177056f5738dSFlorian Hahn     auto *TruncType = cast<IntegerType>(EntryVal->getType());
177156f5738dSFlorian Hahn     Step = Builder.CreateTrunc(Step, TruncType);
177256f5738dSFlorian Hahn     Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
177356f5738dSFlorian Hahn   }
177456f5738dSFlorian Hahn 
177556f5738dSFlorian Hahn   Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
17764746395bSLuke Lau   Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
17774746395bSLuke Lau                                       State.VF, State.Builder);
177856f5738dSFlorian Hahn 
177956f5738dSFlorian Hahn   // We create vector phi nodes for both integer and floating-point induction
178056f5738dSFlorian Hahn   // variables. Here, we determine the kind of arithmetic we will perform.
178156f5738dSFlorian Hahn   Instruction::BinaryOps AddOp;
178256f5738dSFlorian Hahn   Instruction::BinaryOps MulOp;
178356f5738dSFlorian Hahn   if (Step->getType()->isIntegerTy()) {
178456f5738dSFlorian Hahn     AddOp = Instruction::Add;
178556f5738dSFlorian Hahn     MulOp = Instruction::Mul;
178656f5738dSFlorian Hahn   } else {
178756f5738dSFlorian Hahn     AddOp = ID.getInductionOpcode();
178856f5738dSFlorian Hahn     MulOp = Instruction::FMul;
178956f5738dSFlorian Hahn   }
179056f5738dSFlorian Hahn 
17918ec40675SFlorian Hahn   Value *SplatVF;
17928ec40675SFlorian Hahn   if (VPValue *SplatVFOperand = getSplatVFValue()) {
17938ec40675SFlorian Hahn     // The recipe has been unrolled. In that case, fetch the splat value for the
17948ec40675SFlorian Hahn     // induction increment.
179557f5d8f2SFlorian Hahn     SplatVF = State.get(SplatVFOperand);
17968ec40675SFlorian Hahn   } else {
179756f5738dSFlorian Hahn     // Multiply the vectorization factor by the step using integer or
179856f5738dSFlorian Hahn     // floating-point arithmetic as appropriate.
179956f5738dSFlorian Hahn     Type *StepType = Step->getType();
1800aae7ac66SFlorian Hahn     Value *RuntimeVF = State.get(getVFValue(), VPLane(0));
180156f5738dSFlorian Hahn     if (Step->getType()->isFloatingPointTy())
1802a794ee45SFlorian Hahn       RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType);
180356f5738dSFlorian Hahn     else
1804a794ee45SFlorian Hahn       RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, StepType);
180556f5738dSFlorian Hahn     Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
180656f5738dSFlorian Hahn 
180756f5738dSFlorian Hahn     // Create a vector splat to use in the induction update.
1808d9c26957SLuke Lau     SplatVF = Builder.CreateVectorSplat(State.VF, Mul);
18098ec40675SFlorian Hahn   }
18108ec40675SFlorian Hahn 
181156f5738dSFlorian Hahn   Builder.restoreIP(CurrIP);
181256f5738dSFlorian Hahn 
181356f5738dSFlorian Hahn   // We may need to add the step a number of times, depending on the unroll
181456f5738dSFlorian Hahn   // factor. The last of those goes into the PHI.
18156942c64eSJeremy Morse   PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
18166942c64eSJeremy Morse   VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1817734a204fSFlorian Hahn   VecInd->setDebugLoc(getDebugLoc());
181857f5d8f2SFlorian Hahn   State.set(this, VecInd);
181956f5738dSFlorian Hahn 
1820040bb371SFlorian Hahn   Instruction *LastInduction = cast<Instruction>(
1821040bb371SFlorian Hahn       Builder.CreateBinOp(AddOp, VecInd, SplatVF, "vec.ind.next"));
182256f5738dSFlorian Hahn   if (isa<TruncInst>(EntryVal))
182356f5738dSFlorian Hahn     State.addMetadata(LastInduction, EntryVal);
1824734a204fSFlorian Hahn   LastInduction->setDebugLoc(getDebugLoc());
182556f5738dSFlorian Hahn 
182656f5738dSFlorian Hahn   VecInd->addIncoming(SteppedStart, VectorPH);
182756f5738dSFlorian Hahn   // Add induction update using an incorrect block temporarily. The phi node
182856f5738dSFlorian Hahn   // will be fixed after VPlan execution. Note that at this point the latch
182956f5738dSFlorian Hahn   // block cannot be used, as it does not exist yet.
183056f5738dSFlorian Hahn   // TODO: Model increment value in VPlan, by turning the recipe into a
183156f5738dSFlorian Hahn   // multi-def and a subclass of VPHeaderPHIRecipe.
183256f5738dSFlorian Hahn   VecInd->addIncoming(LastInduction, VectorPH);
183356f5738dSFlorian Hahn }
183456f5738dSFlorian Hahn 
183556f5738dSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
183603975b7fSFlorian Hahn void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
183703975b7fSFlorian Hahn                                           VPSlotTracker &SlotTracker) const {
183843045051SFlorian Hahn   O << Indent;
183943045051SFlorian Hahn   printAsOperand(O, SlotTracker);
184043045051SFlorian Hahn   O << " = WIDEN-INDUCTION  ";
184143045051SFlorian Hahn   printOperands(O, SlotTracker);
184203975b7fSFlorian Hahn 
184343045051SFlorian Hahn   if (auto *TI = getTruncInst())
184443045051SFlorian Hahn     O << " (truncated to " << *TI->getType() << ")";
184503975b7fSFlorian Hahn }
184603975b7fSFlorian Hahn #endif
184703975b7fSFlorian Hahn 
184803975b7fSFlorian Hahn bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
18492db03152SFlorian Hahn   // The step may be defined by a recipe in the preheader (e.g. if it requires
18502db03152SFlorian Hahn   // SCEV expansion), but for the canonical induction the step is required to be
18512db03152SFlorian Hahn   // 1, which is represented as live-in.
18522db03152SFlorian Hahn   if (getStepValue()->getDefiningRecipe())
18532db03152SFlorian Hahn     return false;
18542db03152SFlorian Hahn   auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
185503975b7fSFlorian Hahn   auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1856a48ebb82SFlorian Hahn   auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
1857a48ebb82SFlorian Hahn   return StartC && StartC->isZero() && StepC && StepC->isOne() &&
1858a48ebb82SFlorian Hahn          getScalarType() == CanIV->getScalarType();
185903975b7fSFlorian Hahn }
186003975b7fSFlorian Hahn 
18610c5df7cdSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
18620c5df7cdSFlorian Hahn void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent,
18630c5df7cdSFlorian Hahn                               VPSlotTracker &SlotTracker) const {
18640c5df7cdSFlorian Hahn   O << Indent;
18650c5df7cdSFlorian Hahn   printAsOperand(O, SlotTracker);
18662f0d3269SShao-Ce SUN   O << " = DERIVED-IV ";
18670c5df7cdSFlorian Hahn   getStartValue()->printAsOperand(O, SlotTracker);
18680c5df7cdSFlorian Hahn   O << " + ";
1869413a66f3SAlexey Bataev   getOperand(1)->printAsOperand(O, SlotTracker);
18700c5df7cdSFlorian Hahn   O << " * ";
18710c5df7cdSFlorian Hahn   getStepValue()->printAsOperand(O, SlotTracker);
1872bf15f1e4SFlorian Hahn }
18730c5df7cdSFlorian Hahn #endif
187403975b7fSFlorian Hahn 
187556f5738dSFlorian Hahn void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
187656f5738dSFlorian Hahn   // Fast-math-flags propagate from the original induction instruction.
187756f5738dSFlorian Hahn   IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
18783e2d564cSFlorian Hahn   if (hasFastMathFlags())
18793e2d564cSFlorian Hahn     State.Builder.setFastMathFlags(getFastMathFlags());
188056f5738dSFlorian Hahn 
188156f5738dSFlorian Hahn   /// Compute scalar induction steps. \p ScalarIV is the scalar induction
188256f5738dSFlorian Hahn   /// variable on which to base the steps, \p Step is the size of the step.
188356f5738dSFlorian Hahn 
1884aae7ac66SFlorian Hahn   Value *BaseIV = State.get(getOperand(0), VPLane(0));
1885aae7ac66SFlorian Hahn   Value *Step = State.get(getStepValue(), VPLane(0));
188656f5738dSFlorian Hahn   IRBuilderBase &Builder = State.Builder;
188756f5738dSFlorian Hahn 
188856f5738dSFlorian Hahn   // Ensure step has the same type as that of scalar IV.
188956f5738dSFlorian Hahn   Type *BaseIVTy = BaseIV->getType()->getScalarType();
18900ab539fdSFlorian Hahn   assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
189156f5738dSFlorian Hahn 
189256f5738dSFlorian Hahn   // We build scalar steps for both integer and floating-point induction
189356f5738dSFlorian Hahn   // variables. Here, we determine the kind of arithmetic we will perform.
189456f5738dSFlorian Hahn   Instruction::BinaryOps AddOp;
189556f5738dSFlorian Hahn   Instruction::BinaryOps MulOp;
189656f5738dSFlorian Hahn   if (BaseIVTy->isIntegerTy()) {
189756f5738dSFlorian Hahn     AddOp = Instruction::Add;
189856f5738dSFlorian Hahn     MulOp = Instruction::Mul;
189956f5738dSFlorian Hahn   } else {
19003e2d564cSFlorian Hahn     AddOp = InductionOpcode;
190156f5738dSFlorian Hahn     MulOp = Instruction::FMul;
190256f5738dSFlorian Hahn   }
190356f5738dSFlorian Hahn 
190456f5738dSFlorian Hahn   // Determine the number of scalars we need to generate for each unroll
190556f5738dSFlorian Hahn   // iteration.
190656f5738dSFlorian Hahn   bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
190756f5738dSFlorian Hahn   // Compute the scalar steps and save the results in State.
190856f5738dSFlorian Hahn   Type *IntStepTy =
190956f5738dSFlorian Hahn       IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
191056f5738dSFlorian Hahn   Type *VecIVTy = nullptr;
191156f5738dSFlorian Hahn   Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
191256f5738dSFlorian Hahn   if (!FirstLaneOnly && State.VF.isScalable()) {
191356f5738dSFlorian Hahn     VecIVTy = VectorType::get(BaseIVTy, State.VF);
191456f5738dSFlorian Hahn     UnitStepVec =
191556f5738dSFlorian Hahn         Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
191656f5738dSFlorian Hahn     SplatStep = Builder.CreateVectorSplat(State.VF, Step);
191756f5738dSFlorian Hahn     SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
191856f5738dSFlorian Hahn   }
191956f5738dSFlorian Hahn 
192056f5738dSFlorian Hahn   unsigned StartLane = 0;
192156f5738dSFlorian Hahn   unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1922aae7ac66SFlorian Hahn   if (State.Lane) {
1923aae7ac66SFlorian Hahn     StartLane = State.Lane->getKnownLane();
192456f5738dSFlorian Hahn     EndLane = StartLane + 1;
192556f5738dSFlorian Hahn   }
19268ec40675SFlorian Hahn   Value *StartIdx0 =
19278ec40675SFlorian Hahn       createStepForVF(Builder, IntStepTy, State.VF, getUnrollPart(*this));
192856f5738dSFlorian Hahn 
192956f5738dSFlorian Hahn   if (!FirstLaneOnly && State.VF.isScalable()) {
193056f5738dSFlorian Hahn     auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
193156f5738dSFlorian Hahn     auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
193256f5738dSFlorian Hahn     if (BaseIVTy->isFloatingPointTy())
193356f5738dSFlorian Hahn       InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
193456f5738dSFlorian Hahn     auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
193556f5738dSFlorian Hahn     auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
193657f5d8f2SFlorian Hahn     State.set(this, Add);
193756f5738dSFlorian Hahn     // It's useful to record the lane values too for the known minimum number
193856f5738dSFlorian Hahn     // of elements so we do those below. This improves the code quality when
193956f5738dSFlorian Hahn     // trying to extract the first element, for example.
194056f5738dSFlorian Hahn   }
194156f5738dSFlorian Hahn 
194256f5738dSFlorian Hahn   if (BaseIVTy->isFloatingPointTy())
194356f5738dSFlorian Hahn     StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
194456f5738dSFlorian Hahn 
194556f5738dSFlorian Hahn   for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
194656f5738dSFlorian Hahn     Value *StartIdx = Builder.CreateBinOp(
194756f5738dSFlorian Hahn         AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
194856f5738dSFlorian Hahn     // The step returned by `createStepForVF` is a runtime-evaluated value
194956f5738dSFlorian Hahn     // when VF is scalable. Otherwise, it should be folded into a Constant.
195056f5738dSFlorian Hahn     assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
195156f5738dSFlorian Hahn            "Expected StartIdx to be folded to a constant when VF is not "
195256f5738dSFlorian Hahn            "scalable");
195356f5738dSFlorian Hahn     auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
195456f5738dSFlorian Hahn     auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1955aae7ac66SFlorian Hahn     State.set(this, Add, VPLane(Lane));
195656f5738dSFlorian Hahn   }
195756f5738dSFlorian Hahn }
195856f5738dSFlorian Hahn 
195903975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
196003975b7fSFlorian Hahn void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
196103975b7fSFlorian Hahn                                   VPSlotTracker &SlotTracker) const {
196203975b7fSFlorian Hahn   O << Indent;
196303975b7fSFlorian Hahn   printAsOperand(O, SlotTracker);
19641d1cba44SFlorian Hahn   O << " = SCALAR-STEPS ";
196503975b7fSFlorian Hahn   printOperands(O, SlotTracker);
196603975b7fSFlorian Hahn }
19676a4bc452SFlorian Hahn #endif
196803975b7fSFlorian Hahn 
19696a4bc452SFlorian Hahn void VPWidenGEPRecipe::execute(VPTransformState &State) {
197001fa764cSFlorian Hahn   assert(State.VF.isVector() && "not widening");
19716a4bc452SFlorian Hahn   auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
19726a4bc452SFlorian Hahn   // Construct a vector GEP by widening the operands of the scalar GEP as
19736a4bc452SFlorian Hahn   // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
19746a4bc452SFlorian Hahn   // results in a vector of pointers when at least one operand of the GEP
19756a4bc452SFlorian Hahn   // is vector-typed. Thus, to keep the representation compact, we only use
19766a4bc452SFlorian Hahn   // vector-typed operands for loop-varying values.
19776a4bc452SFlorian Hahn 
197801fa764cSFlorian Hahn   if (areAllOperandsInvariant()) {
19796a4bc452SFlorian Hahn     // If we are vectorizing, but the GEP has only loop-invariant operands,
19806a4bc452SFlorian Hahn     // the GEP we build (by only using vector-typed operands for
19816a4bc452SFlorian Hahn     // loop-varying values) would be a scalar pointer. Thus, to ensure we
19826a4bc452SFlorian Hahn     // produce a vector of pointers, we need to either arbitrarily pick an
19836a4bc452SFlorian Hahn     // operand to broadcast, or broadcast a clone of the original GEP.
19846a4bc452SFlorian Hahn     // Here, we broadcast a clone of the original.
19856a4bc452SFlorian Hahn     //
19866a4bc452SFlorian Hahn     // TODO: If at some point we decide to scalarize instructions having
19876a4bc452SFlorian Hahn     //       loop-invariant operands, this special case will no longer be
19886a4bc452SFlorian Hahn     //       required. We would add the scalarization decision to
19896a4bc452SFlorian Hahn     //       collectLoopScalars() and teach getVectorValue() to broadcast
19906a4bc452SFlorian Hahn     //       the lane-zero scalar value.
19910a246a0cSFlorian Hahn     SmallVector<Value *> Ops;
19920a246a0cSFlorian Hahn     for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1993aae7ac66SFlorian Hahn       Ops.push_back(State.get(getOperand(I), VPLane(0)));
19940a246a0cSFlorian Hahn 
199511571874SNikita Popov     auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
199611571874SNikita Popov                                            ArrayRef(Ops).drop_front(), "",
199711571874SNikita Popov                                            getGEPNoWrapFlags());
199806c3a7d2SFlorian Hahn     Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
199957f5d8f2SFlorian Hahn     State.set(this, Splat);
200006c3a7d2SFlorian Hahn     State.addMetadata(Splat, GEP);
20016a4bc452SFlorian Hahn   } else {
20026a4bc452SFlorian Hahn     // If the GEP has at least one loop-varying operand, we are sure to
200306c3a7d2SFlorian Hahn     // produce a vector of pointers unless VF is scalar.
20046a4bc452SFlorian Hahn     // The pointer operand of the new GEP. If it's loop-invariant, we
20056a4bc452SFlorian Hahn     // won't broadcast it.
2006aae7ac66SFlorian Hahn     auto *Ptr = isPointerLoopInvariant() ? State.get(getOperand(0), VPLane(0))
200757f5d8f2SFlorian Hahn                                          : State.get(getOperand(0));
20086a4bc452SFlorian Hahn 
20096a4bc452SFlorian Hahn     // Collect all the indices for the new GEP. If any index is
20106a4bc452SFlorian Hahn     // loop-invariant, we won't broadcast it.
20116a4bc452SFlorian Hahn     SmallVector<Value *, 4> Indices;
20126a4bc452SFlorian Hahn     for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
20136a4bc452SFlorian Hahn       VPValue *Operand = getOperand(I);
2014a8adb38aSFlorian Hahn       if (isIndexLoopInvariant(I - 1))
2015aae7ac66SFlorian Hahn         Indices.push_back(State.get(Operand, VPLane(0)));
20166a4bc452SFlorian Hahn       else
201757f5d8f2SFlorian Hahn         Indices.push_back(State.get(Operand));
20186a4bc452SFlorian Hahn     }
20196a4bc452SFlorian Hahn 
20206a4bc452SFlorian Hahn     // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
20216a4bc452SFlorian Hahn     // but it should be a vector, otherwise.
20226a4bc452SFlorian Hahn     auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
202311571874SNikita Popov                                            Indices, "", getGEPNoWrapFlags());
20246a4bc452SFlorian Hahn     assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
20256a4bc452SFlorian Hahn            "NewGEP is not a pointer vector");
202657f5d8f2SFlorian Hahn     State.set(this, NewGEP);
20276a4bc452SFlorian Hahn     State.addMetadata(NewGEP, GEP);
20286a4bc452SFlorian Hahn   }
20296a4bc452SFlorian Hahn }
20306a4bc452SFlorian Hahn 
20316a4bc452SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
203203975b7fSFlorian Hahn void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
203303975b7fSFlorian Hahn                              VPSlotTracker &SlotTracker) const {
203403975b7fSFlorian Hahn   O << Indent << "WIDEN-GEP ";
2035a8adb38aSFlorian Hahn   O << (isPointerLoopInvariant() ? "Inv" : "Var");
2036a8adb38aSFlorian Hahn   for (size_t I = 0; I < getNumOperands() - 1; ++I)
2037a8adb38aSFlorian Hahn     O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
203803975b7fSFlorian Hahn 
203903975b7fSFlorian Hahn   O << " ";
204003975b7fSFlorian Hahn   printAsOperand(O, SlotTracker);
204103975b7fSFlorian Hahn   O << " = getelementptr";
2042299f0ff6SFlorian Hahn   printFlags(O);
204303975b7fSFlorian Hahn   printOperands(O, SlotTracker);
204403975b7fSFlorian Hahn }
20455d135041SFlorian Hahn #endif
204603975b7fSFlorian Hahn 
2047266ff98cSShih-Po Hung static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
2048266ff98cSShih-Po Hung                            unsigned CurrentPart, IRBuilderBase &Builder) {
2049f18536d6SFlorian Hahn   // Use i32 for the gep index type when the value is constant,
2050f18536d6SFlorian Hahn   // or query DataLayout for a more suitable index type otherwise.
205106c3a7d2SFlorian Hahn   const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
2052266ff98cSShih-Po Hung   return IsScalable && (IsReverse || CurrentPart > 0)
2053e177dd6fSYoungsuk Kim              ? DL.getIndexType(Builder.getPtrTy(0))
2054f18536d6SFlorian Hahn              : Builder.getInt32Ty();
2055266ff98cSShih-Po Hung }
205606c3a7d2SFlorian Hahn 
2057266ff98cSShih-Po Hung void VPReverseVectorPointerRecipe::execute(VPTransformState &State) {
2058266ff98cSShih-Po Hung   auto &Builder = State.Builder;
2059266ff98cSShih-Po Hung   State.setDebugLocFrom(getDebugLoc());
2060266ff98cSShih-Po Hung   unsigned CurrentPart = getUnrollPart(*this);
2061266ff98cSShih-Po Hung   Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
2062266ff98cSShih-Po Hung                                 CurrentPart, Builder);
2063266ff98cSShih-Po Hung 
2064266ff98cSShih-Po Hung   // The wide store needs to start at the last vector element.
2065266ff98cSShih-Po Hung   Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
2066266ff98cSShih-Po Hung   if (IndexTy != RunTimeVF->getType())
2067266ff98cSShih-Po Hung     RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
20688ec40675SFlorian Hahn   // NumElt = -CurrentPart * RunTimeVF
2069f18536d6SFlorian Hahn   Value *NumElt = Builder.CreateMul(
20708ec40675SFlorian Hahn       ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
2071f18536d6SFlorian Hahn   // LastLane = 1 - RunTimeVF
2072266ff98cSShih-Po Hung   Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
2073266ff98cSShih-Po Hung   Value *Ptr = State.get(getOperand(0), VPLane(0));
207411571874SNikita Popov   Value *ResultPtr =
207511571874SNikita Popov       Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());
207611571874SNikita Popov   ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",
207711571874SNikita Popov                                 getGEPNoWrapFlags());
2078266ff98cSShih-Po Hung 
2079266ff98cSShih-Po Hung   State.set(this, ResultPtr, /*IsScalar*/ true);
2080f18536d6SFlorian Hahn }
2081f18536d6SFlorian Hahn 
2082266ff98cSShih-Po Hung #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2083266ff98cSShih-Po Hung void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
2084266ff98cSShih-Po Hung                                          VPSlotTracker &SlotTracker) const {
2085266ff98cSShih-Po Hung   O << Indent;
2086266ff98cSShih-Po Hung   printAsOperand(O, SlotTracker);
2087266ff98cSShih-Po Hung   O << " = reverse-vector-pointer";
208811571874SNikita Popov   printFlags(O);
2089266ff98cSShih-Po Hung   printOperands(O, SlotTracker);
2090266ff98cSShih-Po Hung }
2091266ff98cSShih-Po Hung #endif
2092266ff98cSShih-Po Hung 
2093266ff98cSShih-Po Hung void VPVectorPointerRecipe::execute(VPTransformState &State) {
2094266ff98cSShih-Po Hung   auto &Builder = State.Builder;
2095266ff98cSShih-Po Hung   State.setDebugLocFrom(getDebugLoc());
2096266ff98cSShih-Po Hung   unsigned CurrentPart = getUnrollPart(*this);
2097266ff98cSShih-Po Hung   Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
2098266ff98cSShih-Po Hung                                 CurrentPart, Builder);
2099266ff98cSShih-Po Hung   Value *Ptr = State.get(getOperand(0), VPLane(0));
2100266ff98cSShih-Po Hung 
2101266ff98cSShih-Po Hung   Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
210211571874SNikita Popov   Value *ResultPtr =
210311571874SNikita Popov       Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags());
2104266ff98cSShih-Po Hung 
210557f5d8f2SFlorian Hahn   State.set(this, ResultPtr, /*IsScalar*/ true);
2106f18536d6SFlorian Hahn }
2107f18536d6SFlorian Hahn 
2108f18536d6SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2109f18536d6SFlorian Hahn void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
2110f18536d6SFlorian Hahn                                   VPSlotTracker &SlotTracker) const {
2111f18536d6SFlorian Hahn   O << Indent;
2112f18536d6SFlorian Hahn   printAsOperand(O, SlotTracker);
2113f18536d6SFlorian Hahn   O << " = vector-pointer ";
2114f18536d6SFlorian Hahn 
2115f18536d6SFlorian Hahn   printOperands(O, SlotTracker);
2116f18536d6SFlorian Hahn }
2117f18536d6SFlorian Hahn #endif
2118f18536d6SFlorian Hahn 
21195d135041SFlorian Hahn void VPBlendRecipe::execute(VPTransformState &State) {
21204f075086SPaul Walker   assert(isNormalized() && "Expected blend to be normalized!");
2121165e24aaSFlorian Hahn   State.setDebugLocFrom(getDebugLoc());
21225d135041SFlorian Hahn   // We know that all PHIs in non-header blocks are converted into
21235d135041SFlorian Hahn   // selects, so we don't have to worry about the insertion order and we
21245d135041SFlorian Hahn   // can just use the builder.
21255d135041SFlorian Hahn   // At this point we generate the predication tree. There may be
21265d135041SFlorian Hahn   // duplications since this is a simple recursive scan, but future
21275d135041SFlorian Hahn   // optimizations will clean it up.
21285d135041SFlorian Hahn 
21295d135041SFlorian Hahn   unsigned NumIncoming = getNumIncomingValues();
21305d135041SFlorian Hahn 
21315d135041SFlorian Hahn   // Generate a sequence of selects of the form:
21325d135041SFlorian Hahn   // SELECT(Mask3, In3,
21335d135041SFlorian Hahn   //        SELECT(Mask2, In2,
21345d135041SFlorian Hahn   //               SELECT(Mask1, In1,
21355d135041SFlorian Hahn   //                      In0)))
21365d135041SFlorian Hahn   // Note that Mask0 is never used: lanes for which no path reaches this phi and
21375d135041SFlorian Hahn   // are essentially undef are taken from In0.
2138d187005cSFlorian Hahn   bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
213906c3a7d2SFlorian Hahn   Value *Result = nullptr;
21405d135041SFlorian Hahn   for (unsigned In = 0; In < NumIncoming; ++In) {
21415d135041SFlorian Hahn     // We might have single edge PHIs (blocks) - use an identity
21425d135041SFlorian Hahn     // 'select' for the first PHI operand.
214357f5d8f2SFlorian Hahn     Value *In0 = State.get(getIncomingValue(In), OnlyFirstLaneUsed);
21445d135041SFlorian Hahn     if (In == 0)
214506c3a7d2SFlorian Hahn       Result = In0; // Initialize with the first incoming value.
21465d135041SFlorian Hahn     else {
21475d135041SFlorian Hahn       // Select between the current value and the previous incoming edge
21485d135041SFlorian Hahn       // based on the incoming mask.
214957f5d8f2SFlorian Hahn       Value *Cond = State.get(getMask(In), OnlyFirstLaneUsed);
215006c3a7d2SFlorian Hahn       Result = State.Builder.CreateSelect(Cond, In0, Result, "predphi");
21515d135041SFlorian Hahn     }
21525d135041SFlorian Hahn   }
215357f5d8f2SFlorian Hahn   State.set(this, Result, OnlyFirstLaneUsed);
21545d135041SFlorian Hahn }
21555d135041SFlorian Hahn 
215636fc291bSFlorian Hahn InstructionCost VPBlendRecipe::computeCost(ElementCount VF,
215736fc291bSFlorian Hahn                                            VPCostContext &Ctx) const {
215836fc291bSFlorian Hahn   // Handle cases where only the first lane is used the same way as the legacy
215936fc291bSFlorian Hahn   // cost model.
216036fc291bSFlorian Hahn   if (vputils::onlyFirstLaneUsed(this))
2161edf3a55bSJohn Brawn     return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
216236fc291bSFlorian Hahn 
21639ab5474eSBenjamin Maxwell   Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
21649ab5474eSBenjamin Maxwell   Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
216536fc291bSFlorian Hahn   return (getNumIncomingValues() - 1) *
216636fc291bSFlorian Hahn          Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
2167edf3a55bSJohn Brawn                                     CmpInst::BAD_ICMP_PREDICATE, Ctx.CostKind);
216836fc291bSFlorian Hahn }
216936fc291bSFlorian Hahn 
21705d135041SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
217103975b7fSFlorian Hahn void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
217203975b7fSFlorian Hahn                           VPSlotTracker &SlotTracker) const {
217303975b7fSFlorian Hahn   O << Indent << "BLEND ";
21743fa1b254SFlorian Hahn   printAsOperand(O, SlotTracker);
217503975b7fSFlorian Hahn   O << " =";
217603975b7fSFlorian Hahn   if (getNumIncomingValues() == 1) {
217703975b7fSFlorian Hahn     // Not a User of any mask: not really blending, this is a
217803975b7fSFlorian Hahn     // single-predecessor phi.
217903975b7fSFlorian Hahn     O << " ";
218003975b7fSFlorian Hahn     getIncomingValue(0)->printAsOperand(O, SlotTracker);
218103975b7fSFlorian Hahn   } else {
218203975b7fSFlorian Hahn     for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
218303975b7fSFlorian Hahn       O << " ";
218403975b7fSFlorian Hahn       getIncomingValue(I)->printAsOperand(O, SlotTracker);
2185c8369836SFlorian Hahn       if (I == 0)
2186c8369836SFlorian Hahn         continue;
218703975b7fSFlorian Hahn       O << "/";
218803975b7fSFlorian Hahn       getMask(I)->printAsOperand(O, SlotTracker);
218903975b7fSFlorian Hahn     }
219003975b7fSFlorian Hahn   }
219103975b7fSFlorian Hahn }
219215d11a4dSFlorian Hahn #endif
219303975b7fSFlorian Hahn 
219415d11a4dSFlorian Hahn void VPReductionRecipe::execute(VPTransformState &State) {
2195aae7ac66SFlorian Hahn   assert(!State.Lane && "Reduction being replicated.");
219657f5d8f2SFlorian Hahn   Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
219715d11a4dSFlorian Hahn   RecurKind Kind = RdxDesc.getRecurrenceKind();
219815d11a4dSFlorian Hahn   // Propagate the fast-math flags carried by the underlying instruction.
219915d11a4dSFlorian Hahn   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
220015d11a4dSFlorian Hahn   State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
220147e1c87aSElvis Wang   State.setDebugLocFrom(getDebugLoc());
220257f5d8f2SFlorian Hahn   Value *NewVecOp = State.get(getVecOp());
220315d11a4dSFlorian Hahn   if (VPValue *Cond = getCondOp()) {
220457f5d8f2SFlorian Hahn     Value *NewCond = State.get(Cond, State.VF.isScalar());
220515d11a4dSFlorian Hahn     VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
220615d11a4dSFlorian Hahn     Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
220715d11a4dSFlorian Hahn 
22080b2f2537SPhilip Reames     Value *Start;
22090b2f2537SPhilip Reames     if (RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind))
22100b2f2537SPhilip Reames       Start = RdxDesc.getRecurrenceStartValue();
22110b2f2537SPhilip Reames     else
22123d9abfc9SPhilip Reames       Start = llvm::getRecurrenceIdentity(Kind, ElementTy,
22130b2f2537SPhilip Reames                                           RdxDesc.getFastMathFlags());
22140b2f2537SPhilip Reames     if (State.VF.isVector())
221506c3a7d2SFlorian Hahn       Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start);
22160b2f2537SPhilip Reames 
22170b2f2537SPhilip Reames     Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);
221815d11a4dSFlorian Hahn     NewVecOp = Select;
221915d11a4dSFlorian Hahn   }
222015d11a4dSFlorian Hahn   Value *NewRed;
222115d11a4dSFlorian Hahn   Value *NextInChain;
222215d11a4dSFlorian Hahn   if (IsOrdered) {
222315d11a4dSFlorian Hahn     if (State.VF.isVector())
222406c3a7d2SFlorian Hahn       NewRed =
222506c3a7d2SFlorian Hahn           createOrderedReduction(State.Builder, RdxDesc, NewVecOp, PrevInChain);
222615d11a4dSFlorian Hahn     else
222715d11a4dSFlorian Hahn       NewRed = State.Builder.CreateBinOp(
22282a0ee090SRamkumar Ramachandra           (Instruction::BinaryOps)RdxDesc.getOpcode(), PrevInChain, NewVecOp);
222915d11a4dSFlorian Hahn     PrevInChain = NewRed;
2230c53008deSPhilip Reames     NextInChain = NewRed;
223115d11a4dSFlorian Hahn   } else {
223257f5d8f2SFlorian Hahn     PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
22333e8840baSPhilip Reames     NewRed = createReduction(State.Builder, RdxDesc, NewVecOp);
2234c53008deSPhilip Reames     if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))
223515d11a4dSFlorian Hahn       NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
223615d11a4dSFlorian Hahn                                    NewRed, PrevInChain);
223715d11a4dSFlorian Hahn     else
223815d11a4dSFlorian Hahn       NextInChain = State.Builder.CreateBinOp(
22392a0ee090SRamkumar Ramachandra           (Instruction::BinaryOps)RdxDesc.getOpcode(), NewRed, PrevInChain);
2240c53008deSPhilip Reames   }
224157f5d8f2SFlorian Hahn   State.set(this, NextInChain, /*IsScalar*/ true);
224215d11a4dSFlorian Hahn }
224315d11a4dSFlorian Hahn 
22444eb30cfbSMel Chen void VPReductionEVLRecipe::execute(VPTransformState &State) {
2245aae7ac66SFlorian Hahn   assert(!State.Lane && "Reduction being replicated.");
22464eb30cfbSMel Chen 
22474eb30cfbSMel Chen   auto &Builder = State.Builder;
22484eb30cfbSMel Chen   // Propagate the fast-math flags carried by the underlying instruction.
22494eb30cfbSMel Chen   IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
22504eb30cfbSMel Chen   const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor();
22514eb30cfbSMel Chen   Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
22524eb30cfbSMel Chen 
22534eb30cfbSMel Chen   RecurKind Kind = RdxDesc.getRecurrenceKind();
225457f5d8f2SFlorian Hahn   Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);
225557f5d8f2SFlorian Hahn   Value *VecOp = State.get(getVecOp());
2256aae7ac66SFlorian Hahn   Value *EVL = State.get(getEVL(), VPLane(0));
22574eb30cfbSMel Chen 
22584eb30cfbSMel Chen   VectorBuilder VBuilder(Builder);
22594eb30cfbSMel Chen   VBuilder.setEVL(EVL);
22604eb30cfbSMel Chen   Value *Mask;
22614eb30cfbSMel Chen   // TODO: move the all-true mask generation into VectorBuilder.
22624eb30cfbSMel Chen   if (VPValue *CondOp = getCondOp())
226357f5d8f2SFlorian Hahn     Mask = State.get(CondOp);
22644eb30cfbSMel Chen   else
22654eb30cfbSMel Chen     Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
22664eb30cfbSMel Chen   VBuilder.setMask(Mask);
22674eb30cfbSMel Chen 
22684eb30cfbSMel Chen   Value *NewRed;
22694eb30cfbSMel Chen   if (isOrdered()) {
22704eb30cfbSMel Chen     NewRed = createOrderedReduction(VBuilder, RdxDesc, VecOp, Prev);
22714eb30cfbSMel Chen   } else {
22723e8840baSPhilip Reames     NewRed = createSimpleReduction(VBuilder, VecOp, RdxDesc);
22734eb30cfbSMel Chen     if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))
22744eb30cfbSMel Chen       NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);
22754eb30cfbSMel Chen     else
22762a0ee090SRamkumar Ramachandra       NewRed = Builder.CreateBinOp((Instruction::BinaryOps)RdxDesc.getOpcode(),
22772a0ee090SRamkumar Ramachandra                                    NewRed, Prev);
22784eb30cfbSMel Chen   }
227957f5d8f2SFlorian Hahn   State.set(this, NewRed, /*IsScalar*/ true);
22804eb30cfbSMel Chen }
22814eb30cfbSMel Chen 
22823c91a2f7SElvis Wang InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
22833c91a2f7SElvis Wang                                                VPCostContext &Ctx) const {
22843c91a2f7SElvis Wang   RecurKind RdxKind = RdxDesc.getRecurrenceKind();
22853c91a2f7SElvis Wang   Type *ElementTy = Ctx.Types.inferScalarType(this);
22869ab5474eSBenjamin Maxwell   auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
22873c91a2f7SElvis Wang   unsigned Opcode = RdxDesc.getOpcode();
22883c91a2f7SElvis Wang 
22893c91a2f7SElvis Wang   // TODO: Support any-of and in-loop reductions.
22903c91a2f7SElvis Wang   assert(
22913c91a2f7SElvis Wang       (!RecurrenceDescriptor::isAnyOfRecurrenceKind(RdxKind) ||
22923c91a2f7SElvis Wang        ForceTargetInstructionCost.getNumOccurrences() > 0) &&
22933c91a2f7SElvis Wang       "Any-of reduction not implemented in VPlan-based cost model currently.");
22943c91a2f7SElvis Wang   assert(
22953c91a2f7SElvis Wang       (!cast<VPReductionPHIRecipe>(getOperand(0))->isInLoop() ||
22963c91a2f7SElvis Wang        ForceTargetInstructionCost.getNumOccurrences() > 0) &&
22973c91a2f7SElvis Wang       "In-loop reduction not implemented in VPlan-based cost model currently.");
22983c91a2f7SElvis Wang 
22993c91a2f7SElvis Wang   assert(ElementTy->getTypeID() == RdxDesc.getRecurrenceType()->getTypeID() &&
23003c91a2f7SElvis Wang          "Inferred type and recurrence type mismatch.");
23013c91a2f7SElvis Wang 
23023c91a2f7SElvis Wang   // Cost = Reduction cost + BinOp cost
23033c91a2f7SElvis Wang   InstructionCost Cost =
2304edf3a55bSJohn Brawn       Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, Ctx.CostKind);
23053c91a2f7SElvis Wang   if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind)) {
23063c91a2f7SElvis Wang     Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind);
23073c91a2f7SElvis Wang     return Cost + Ctx.TTI.getMinMaxReductionCost(
2308edf3a55bSJohn Brawn                       Id, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind);
23093c91a2f7SElvis Wang   }
23103c91a2f7SElvis Wang 
23113c91a2f7SElvis Wang   return Cost + Ctx.TTI.getArithmeticReductionCost(
2312edf3a55bSJohn Brawn                     Opcode, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind);
23133c91a2f7SElvis Wang }
23143c91a2f7SElvis Wang 
231515d11a4dSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
231603975b7fSFlorian Hahn void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
231703975b7fSFlorian Hahn                               VPSlotTracker &SlotTracker) const {
231803975b7fSFlorian Hahn   O << Indent << "REDUCE ";
231903975b7fSFlorian Hahn   printAsOperand(O, SlotTracker);
232003975b7fSFlorian Hahn   O << " = ";
232103975b7fSFlorian Hahn   getChainOp()->printAsOperand(O, SlotTracker);
232203975b7fSFlorian Hahn   O << " +";
232303975b7fSFlorian Hahn   if (isa<FPMathOperator>(getUnderlyingInstr()))
232403975b7fSFlorian Hahn     O << getUnderlyingInstr()->getFastMathFlags();
2325463e7cb8SMel Chen   O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
232603975b7fSFlorian Hahn   getVecOp()->printAsOperand(O, SlotTracker);
23274eb30cfbSMel Chen   if (isConditional()) {
23284eb30cfbSMel Chen     O << ", ";
23294eb30cfbSMel Chen     getCondOp()->printAsOperand(O, SlotTracker);
23304eb30cfbSMel Chen   }
23314eb30cfbSMel Chen   O << ")";
23324eb30cfbSMel Chen   if (RdxDesc.IntermediateStore)
23334eb30cfbSMel Chen     O << " (with final reduction value stored in invariant address sank "
23344eb30cfbSMel Chen          "outside of loop)";
23354eb30cfbSMel Chen }
23364eb30cfbSMel Chen 
23374eb30cfbSMel Chen void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent,
23384eb30cfbSMel Chen                                  VPSlotTracker &SlotTracker) const {
23394eb30cfbSMel Chen   const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor();
23404eb30cfbSMel Chen   O << Indent << "REDUCE ";
23414eb30cfbSMel Chen   printAsOperand(O, SlotTracker);
23424eb30cfbSMel Chen   O << " = ";
23434eb30cfbSMel Chen   getChainOp()->printAsOperand(O, SlotTracker);
23444eb30cfbSMel Chen   O << " +";
23454eb30cfbSMel Chen   if (isa<FPMathOperator>(getUnderlyingInstr()))
23464eb30cfbSMel Chen     O << getUnderlyingInstr()->getFastMathFlags();
23474eb30cfbSMel Chen   O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
23484eb30cfbSMel Chen   getVecOp()->printAsOperand(O, SlotTracker);
23494eb30cfbSMel Chen   O << ", ";
23504eb30cfbSMel Chen   getEVL()->printAsOperand(O, SlotTracker);
23514eb30cfbSMel Chen   if (isConditional()) {
235203975b7fSFlorian Hahn     O << ", ";
235303975b7fSFlorian Hahn     getCondOp()->printAsOperand(O, SlotTracker);
235403975b7fSFlorian Hahn   }
235503975b7fSFlorian Hahn   O << ")";
2356463e7cb8SMel Chen   if (RdxDesc.IntermediateStore)
235703975b7fSFlorian Hahn     O << " (with final reduction value stored in invariant address sank "
235803975b7fSFlorian Hahn          "outside of loop)";
235903975b7fSFlorian Hahn }
2360df016a95SFlorian Hahn #endif
236103975b7fSFlorian Hahn 
23629333b977SFlorian Hahn bool VPReplicateRecipe::shouldPack() const {
23639333b977SFlorian Hahn   // Find if the recipe is used by a widened recipe via an intervening
23649333b977SFlorian Hahn   // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
23659333b977SFlorian Hahn   return any_of(users(), [](const VPUser *U) {
23669333b977SFlorian Hahn     if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
2367c21ccebeSFlorian Hahn       return any_of(PredR->users(), [PredR](const VPUser *U) {
2368c21ccebeSFlorian Hahn         return !U->usesScalars(PredR);
2369c21ccebeSFlorian Hahn       });
23709333b977SFlorian Hahn     return false;
23719333b977SFlorian Hahn   });
23729333b977SFlorian Hahn }
2373df016a95SFlorian Hahn 
2374af6ebb70SFlorian Hahn InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
2375af6ebb70SFlorian Hahn                                                VPCostContext &Ctx) const {
2376af6ebb70SFlorian Hahn   Instruction *UI = cast<Instruction>(getUnderlyingValue());
2377af6ebb70SFlorian Hahn   // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
2378af6ebb70SFlorian Hahn   // transform, avoid computing their cost multiple times for now.
2379af6ebb70SFlorian Hahn   Ctx.SkipCostComputation.insert(UI);
2380af6ebb70SFlorian Hahn   return Ctx.getLegacyCost(UI, VF);
2381af6ebb70SFlorian Hahn }
2382af6ebb70SFlorian Hahn 
2383df016a95SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
238403975b7fSFlorian Hahn void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
238503975b7fSFlorian Hahn                               VPSlotTracker &SlotTracker) const {
238603975b7fSFlorian Hahn   O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
238703975b7fSFlorian Hahn 
238803975b7fSFlorian Hahn   if (!getUnderlyingInstr()->getType()->isVoidTy()) {
238903975b7fSFlorian Hahn     printAsOperand(O, SlotTracker);
239003975b7fSFlorian Hahn     O << " = ";
239103975b7fSFlorian Hahn   }
239203975b7fSFlorian Hahn   if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
2393299f0ff6SFlorian Hahn     O << "call";
2394299f0ff6SFlorian Hahn     printFlags(O);
2395299f0ff6SFlorian Hahn     O << "@" << CB->getCalledFunction()->getName() << "(";
239603975b7fSFlorian Hahn     interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
239703975b7fSFlorian Hahn                     O, [&O, &SlotTracker](VPValue *Op) {
239803975b7fSFlorian Hahn                       Op->printAsOperand(O, SlotTracker);
239903975b7fSFlorian Hahn                     });
240003975b7fSFlorian Hahn     O << ")";
240103975b7fSFlorian Hahn   } else {
2402299f0ff6SFlorian Hahn     O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode());
2403299f0ff6SFlorian Hahn     printFlags(O);
240403975b7fSFlorian Hahn     printOperands(O, SlotTracker);
240503975b7fSFlorian Hahn   }
240603975b7fSFlorian Hahn 
24079333b977SFlorian Hahn   if (shouldPack())
240803975b7fSFlorian Hahn     O << " (S->V)";
240903975b7fSFlorian Hahn }
2410225e3ec6SFlorian Hahn #endif
241103975b7fSFlorian Hahn 
241206c3a7d2SFlorian Hahn Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
2413f4230b43SFlorian Hahn   State.setDebugLocFrom(getDebugLoc());
24140ab539fdSFlorian Hahn   assert(vputils::onlyFirstLaneUsed(this) &&
24150ab539fdSFlorian Hahn          "Codegen only implemented for first lane.");
24160ab539fdSFlorian Hahn   switch (Opcode) {
24170ab539fdSFlorian Hahn   case Instruction::SExt:
24180ab539fdSFlorian Hahn   case Instruction::ZExt:
24190ab539fdSFlorian Hahn   case Instruction::Trunc: {
24200ab539fdSFlorian Hahn     // Note: SExt/ZExt not used yet.
2421aae7ac66SFlorian Hahn     Value *Op = State.get(getOperand(0), VPLane(0));
24220ab539fdSFlorian Hahn     return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
24230ab539fdSFlorian Hahn   }
24240ab539fdSFlorian Hahn   default:
24250ab539fdSFlorian Hahn     llvm_unreachable("opcode not implemented yet");
24260ab539fdSFlorian Hahn   }
24270ab539fdSFlorian Hahn }
24280ab539fdSFlorian Hahn 
24290ab539fdSFlorian Hahn void VPScalarCastRecipe ::execute(VPTransformState &State) {
2430aae7ac66SFlorian Hahn   State.set(this, generate(State), VPLane(0));
24310ab539fdSFlorian Hahn }
24320ab539fdSFlorian Hahn 
24330ab539fdSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
24340ab539fdSFlorian Hahn void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
24350ab539fdSFlorian Hahn                                 VPSlotTracker &SlotTracker) const {
24360ab539fdSFlorian Hahn   O << Indent << "SCALAR-CAST ";
24370ab539fdSFlorian Hahn   printAsOperand(O, SlotTracker);
24380ab539fdSFlorian Hahn   O << " = " << Instruction::getOpcodeName(Opcode) << " ";
24390ab539fdSFlorian Hahn   printOperands(O, SlotTracker);
24400ab539fdSFlorian Hahn   O << " to " << *ResultTy;
24410ab539fdSFlorian Hahn }
24420ab539fdSFlorian Hahn #endif
24430ab539fdSFlorian Hahn 
2444225e3ec6SFlorian Hahn void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
2445aae7ac66SFlorian Hahn   assert(State.Lane && "Branch on Mask works only on single instance.");
2446225e3ec6SFlorian Hahn 
2447225e3ec6SFlorian Hahn 
2448225e3ec6SFlorian Hahn   Value *ConditionBit = nullptr;
2449225e3ec6SFlorian Hahn   VPValue *BlockInMask = getMask();
2450*713482fcSFlorian Hahn   if (BlockInMask)
2451*713482fcSFlorian Hahn     ConditionBit = State.get(BlockInMask, *State.Lane);
2452*713482fcSFlorian Hahn   else // Block in mask is all-one.
2453225e3ec6SFlorian Hahn     ConditionBit = State.Builder.getTrue();
2454225e3ec6SFlorian Hahn 
2455225e3ec6SFlorian Hahn   // Replace the temporary unreachable terminator with a new conditional branch,
2456225e3ec6SFlorian Hahn   // whose two destinations will be set later when they are created.
2457225e3ec6SFlorian Hahn   auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
2458225e3ec6SFlorian Hahn   assert(isa<UnreachableInst>(CurrentTerminator) &&
2459225e3ec6SFlorian Hahn          "Expected to replace unreachable terminator with conditional branch.");
2460225e3ec6SFlorian Hahn   auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
2461225e3ec6SFlorian Hahn   CondBr->setSuccessor(0, nullptr);
2462225e3ec6SFlorian Hahn   ReplaceInstWithInst(CurrentTerminator, CondBr);
2463225e3ec6SFlorian Hahn }
2464225e3ec6SFlorian Hahn 
2465fa3258ecSFlorian Hahn InstructionCost VPBranchOnMaskRecipe::computeCost(ElementCount VF,
2466fa3258ecSFlorian Hahn                                                   VPCostContext &Ctx) const {
2467fa3258ecSFlorian Hahn   // The legacy cost model doesn't assign costs to branches for individual
2468fa3258ecSFlorian Hahn   // replicate regions. Match the current behavior in the VPlan cost model for
2469fa3258ecSFlorian Hahn   // now.
2470fa3258ecSFlorian Hahn   return 0;
2471fa3258ecSFlorian Hahn }
2472fa3258ecSFlorian Hahn 
2473cc0ee179SFlorian Hahn void VPPredInstPHIRecipe::execute(VPTransformState &State) {
24749b496debSFlorian Hahn   State.setDebugLocFrom(getDebugLoc());
2475aae7ac66SFlorian Hahn   assert(State.Lane && "Predicated instruction PHI works per instance.");
2476cc0ee179SFlorian Hahn   Instruction *ScalarPredInst =
2477aae7ac66SFlorian Hahn       cast<Instruction>(State.get(getOperand(0), *State.Lane));
2478cc0ee179SFlorian Hahn   BasicBlock *PredicatedBB = ScalarPredInst->getParent();
2479cc0ee179SFlorian Hahn   BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
2480cc0ee179SFlorian Hahn   assert(PredicatingBB && "Predicated block has no single predecessor.");
2481cc0ee179SFlorian Hahn   assert(isa<VPReplicateRecipe>(getOperand(0)) &&
2482cc0ee179SFlorian Hahn          "operand must be VPReplicateRecipe");
2483cc0ee179SFlorian Hahn 
2484cc0ee179SFlorian Hahn   // By current pack/unpack logic we need to generate only a single phi node: if
2485cc0ee179SFlorian Hahn   // a vector value for the predicated instruction exists at this point it means
2486cc0ee179SFlorian Hahn   // the instruction has vector users only, and a phi for the vector value is
2487cc0ee179SFlorian Hahn   // needed. In this case the recipe of the predicated instruction is marked to
2488cc0ee179SFlorian Hahn   // also do that packing, thereby "hoisting" the insert-element sequence.
2489cc0ee179SFlorian Hahn   // Otherwise, a phi node for the scalar value is needed.
249057f5d8f2SFlorian Hahn   if (State.hasVectorValue(getOperand(0))) {
249157f5d8f2SFlorian Hahn     Value *VectorValue = State.get(getOperand(0));
2492cc0ee179SFlorian Hahn     InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
2493cc0ee179SFlorian Hahn     PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
2494cc0ee179SFlorian Hahn     VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
2495cc0ee179SFlorian Hahn     VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
249657f5d8f2SFlorian Hahn     if (State.hasVectorValue(this))
249757f5d8f2SFlorian Hahn       State.reset(this, VPhi);
2498cc0ee179SFlorian Hahn     else
249957f5d8f2SFlorian Hahn       State.set(this, VPhi);
2500cc0ee179SFlorian Hahn     // NOTE: Currently we need to update the value of the operand, so the next
2501cc0ee179SFlorian Hahn     // predicated iteration inserts its generated value in the correct vector.
250257f5d8f2SFlorian Hahn     State.reset(getOperand(0), VPhi);
2503cc0ee179SFlorian Hahn   } else {
250468210c7cSFlorian Hahn     if (vputils::onlyFirstLaneUsed(this) && !State.Lane->isFirstLane())
250568210c7cSFlorian Hahn       return;
250668210c7cSFlorian Hahn 
2507cc0ee179SFlorian Hahn     Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
2508cc0ee179SFlorian Hahn     PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
2509cc0ee179SFlorian Hahn     Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
2510cc0ee179SFlorian Hahn                      PredicatingBB);
2511cc0ee179SFlorian Hahn     Phi->addIncoming(ScalarPredInst, PredicatedBB);
2512aae7ac66SFlorian Hahn     if (State.hasScalarValue(this, *State.Lane))
2513aae7ac66SFlorian Hahn       State.reset(this, Phi, *State.Lane);
2514cc0ee179SFlorian Hahn     else
2515aae7ac66SFlorian Hahn       State.set(this, Phi, *State.Lane);
2516cc0ee179SFlorian Hahn     // NOTE: Currently we need to update the value of the operand, so the next
2517cc0ee179SFlorian Hahn     // predicated iteration inserts its generated value in the correct vector.
2518aae7ac66SFlorian Hahn     State.reset(getOperand(0), Phi, *State.Lane);
2519cc0ee179SFlorian Hahn   }
2520cc0ee179SFlorian Hahn }
2521cc0ee179SFlorian Hahn 
2522225e3ec6SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
252303975b7fSFlorian Hahn void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
252403975b7fSFlorian Hahn                                 VPSlotTracker &SlotTracker) const {
252503975b7fSFlorian Hahn   O << Indent << "PHI-PREDICATED-INSTRUCTION ";
252603975b7fSFlorian Hahn   printAsOperand(O, SlotTracker);
252703975b7fSFlorian Hahn   O << " = ";
252803975b7fSFlorian Hahn   printOperands(O, SlotTracker);
252903975b7fSFlorian Hahn }
253035d3625aSFlorian Hahn #endif
253103975b7fSFlorian Hahn 
2532ed220e15SElvis Wang InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
2533ed220e15SElvis Wang                                                  VPCostContext &Ctx) const {
25349ab5474eSBenjamin Maxwell   Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
2535ed220e15SElvis Wang   const Align Alignment =
2536ed220e15SElvis Wang       getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));
2537ed220e15SElvis Wang   unsigned AS =
2538ed220e15SElvis Wang       getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient));
2539ed220e15SElvis Wang 
2540ed220e15SElvis Wang   if (!Consecutive) {
2541ed220e15SElvis Wang     // TODO: Using the original IR may not be accurate.
2542ed220e15SElvis Wang     // Currently, ARM will use the underlying IR to calculate gather/scatter
2543ed220e15SElvis Wang     // instruction cost.
2544ed220e15SElvis Wang     const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
2545ed220e15SElvis Wang     assert(!Reverse &&
2546ed220e15SElvis Wang            "Inconsecutive memory access should not have the order.");
2547ed220e15SElvis Wang     return Ctx.TTI.getAddressComputationCost(Ty) +
2548ed220e15SElvis Wang            Ctx.TTI.getGatherScatterOpCost(Ingredient.getOpcode(), Ty, Ptr,
2549edf3a55bSJohn Brawn                                           IsMasked, Alignment, Ctx.CostKind,
2550ed220e15SElvis Wang                                           &Ingredient);
2551ed220e15SElvis Wang   }
2552ed220e15SElvis Wang 
2553ed220e15SElvis Wang   InstructionCost Cost = 0;
2554ed220e15SElvis Wang   if (IsMasked) {
2555ed220e15SElvis Wang     Cost += Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment,
2556edf3a55bSJohn Brawn                                           AS, Ctx.CostKind);
2557ed220e15SElvis Wang   } else {
2558ed220e15SElvis Wang     TTI::OperandValueInfo OpInfo =
2559ed220e15SElvis Wang         Ctx.TTI.getOperandInfo(Ingredient.getOperand(0));
2560ed220e15SElvis Wang     Cost += Ctx.TTI.getMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, AS,
2561edf3a55bSJohn Brawn                                     Ctx.CostKind, OpInfo, &Ingredient);
2562ed220e15SElvis Wang   }
2563ed220e15SElvis Wang   if (!Reverse)
2564ed220e15SElvis Wang     return Cost;
2565ed220e15SElvis Wang 
2566edf3a55bSJohn Brawn   return Cost +=
2567edf3a55bSJohn Brawn          Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
2568edf3a55bSJohn Brawn                                 cast<VectorType>(Ty), {}, Ctx.CostKind, 0);
2569ed220e15SElvis Wang }
2570ed220e15SElvis Wang 
257135d3625aSFlorian Hahn void VPWidenLoadRecipe::execute(VPTransformState &State) {
257235d3625aSFlorian Hahn   auto *LI = cast<LoadInst>(&Ingredient);
257335d3625aSFlorian Hahn 
257435d3625aSFlorian Hahn   Type *ScalarDataTy = getLoadStoreType(&Ingredient);
257535d3625aSFlorian Hahn   auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
257635d3625aSFlorian Hahn   const Align Alignment = getLoadStoreAlignment(&Ingredient);
257735d3625aSFlorian Hahn   bool CreateGather = !isConsecutive();
257835d3625aSFlorian Hahn 
257935d3625aSFlorian Hahn   auto &Builder = State.Builder;
258035d3625aSFlorian Hahn   State.setDebugLocFrom(getDebugLoc());
258135d3625aSFlorian Hahn   Value *Mask = nullptr;
258235d3625aSFlorian Hahn   if (auto *VPMask = getMask()) {
258335d3625aSFlorian Hahn     // Mask reversal is only needed for non-all-one (null) masks, as reverse
258435d3625aSFlorian Hahn     // of a null all-one mask is a null mask.
258557f5d8f2SFlorian Hahn     Mask = State.get(VPMask);
258635d3625aSFlorian Hahn     if (isReverse())
258735d3625aSFlorian Hahn       Mask = Builder.CreateVectorReverse(Mask, "reverse");
258835d3625aSFlorian Hahn   }
258935d3625aSFlorian Hahn 
259057f5d8f2SFlorian Hahn   Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateGather);
259106c3a7d2SFlorian Hahn   Value *NewLI;
259235d3625aSFlorian Hahn   if (CreateGather) {
259335d3625aSFlorian Hahn     NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,
259435d3625aSFlorian Hahn                                        "wide.masked.gather");
259535d3625aSFlorian Hahn   } else if (Mask) {
259606c3a7d2SFlorian Hahn     NewLI =
259706c3a7d2SFlorian Hahn         Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
259806c3a7d2SFlorian Hahn                                  PoisonValue::get(DataTy), "wide.masked.load");
259935d3625aSFlorian Hahn   } else {
260035d3625aSFlorian Hahn     NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
260135d3625aSFlorian Hahn   }
260235d3625aSFlorian Hahn   // Add metadata to the load, but setVectorValue to the reverse shuffle.
260335d3625aSFlorian Hahn   State.addMetadata(NewLI, LI);
260435d3625aSFlorian Hahn   if (Reverse)
260535d3625aSFlorian Hahn     NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
260657f5d8f2SFlorian Hahn   State.set(this, NewLI);
260735d3625aSFlorian Hahn }
260835d3625aSFlorian Hahn 
260935d3625aSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2610a9bafe91SFlorian Hahn void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,
261103975b7fSFlorian Hahn                               VPSlotTracker &SlotTracker) const {
261203975b7fSFlorian Hahn   O << Indent << "WIDEN ";
2613a9bafe91SFlorian Hahn   printAsOperand(O, SlotTracker);
2614a9bafe91SFlorian Hahn   O << " = load ";
2615a9bafe91SFlorian Hahn   printOperands(O, SlotTracker);
261603975b7fSFlorian Hahn }
26171fa6c99aSFlorian Hahn #endif
261803975b7fSFlorian Hahn 
26191fa6c99aSFlorian Hahn /// Use all-true mask for reverse rather than actual mask, as it avoids a
26201fa6c99aSFlorian Hahn /// dependence w/o affecting the result.
26211fa6c99aSFlorian Hahn static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand,
26221fa6c99aSFlorian Hahn                                      Value *EVL, const Twine &Name) {
26231fa6c99aSFlorian Hahn   VectorType *ValTy = cast<VectorType>(Operand->getType());
26241fa6c99aSFlorian Hahn   Value *AllTrueMask =
26251fa6c99aSFlorian Hahn       Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
26261fa6c99aSFlorian Hahn   return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
26271fa6c99aSFlorian Hahn                                  {Operand, AllTrueMask, EVL}, nullptr, Name);
26281fa6c99aSFlorian Hahn }
26291fa6c99aSFlorian Hahn 
26301fa6c99aSFlorian Hahn void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
26311fa6c99aSFlorian Hahn   auto *LI = cast<LoadInst>(&Ingredient);
26321fa6c99aSFlorian Hahn 
26331fa6c99aSFlorian Hahn   Type *ScalarDataTy = getLoadStoreType(&Ingredient);
26341fa6c99aSFlorian Hahn   auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
26351fa6c99aSFlorian Hahn   const Align Alignment = getLoadStoreAlignment(&Ingredient);
26361fa6c99aSFlorian Hahn   bool CreateGather = !isConsecutive();
26371fa6c99aSFlorian Hahn 
26381fa6c99aSFlorian Hahn   auto &Builder = State.Builder;
26391fa6c99aSFlorian Hahn   State.setDebugLocFrom(getDebugLoc());
26401fa6c99aSFlorian Hahn   CallInst *NewLI;
2641aae7ac66SFlorian Hahn   Value *EVL = State.get(getEVL(), VPLane(0));
264257f5d8f2SFlorian Hahn   Value *Addr = State.get(getAddr(), !CreateGather);
26431fa6c99aSFlorian Hahn   Value *Mask = nullptr;
26441fa6c99aSFlorian Hahn   if (VPValue *VPMask = getMask()) {
264557f5d8f2SFlorian Hahn     Mask = State.get(VPMask);
26461fa6c99aSFlorian Hahn     if (isReverse())
26471fa6c99aSFlorian Hahn       Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
26481fa6c99aSFlorian Hahn   } else {
26491fa6c99aSFlorian Hahn     Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
26501fa6c99aSFlorian Hahn   }
26511fa6c99aSFlorian Hahn 
26521fa6c99aSFlorian Hahn   if (CreateGather) {
26531fa6c99aSFlorian Hahn     NewLI =
26541fa6c99aSFlorian Hahn         Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
26551fa6c99aSFlorian Hahn                                 nullptr, "wide.masked.gather");
26561fa6c99aSFlorian Hahn   } else {
26571fa6c99aSFlorian Hahn     VectorBuilder VBuilder(Builder);
26581fa6c99aSFlorian Hahn     VBuilder.setEVL(EVL).setMask(Mask);
26591fa6c99aSFlorian Hahn     NewLI = cast<CallInst>(VBuilder.createVectorInstruction(
26601fa6c99aSFlorian Hahn         Instruction::Load, DataTy, Addr, "vp.op.load"));
26611fa6c99aSFlorian Hahn   }
26621fa6c99aSFlorian Hahn   NewLI->addParamAttr(
26631fa6c99aSFlorian Hahn       0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
26641fa6c99aSFlorian Hahn   State.addMetadata(NewLI, LI);
26651fa6c99aSFlorian Hahn   Instruction *Res = NewLI;
26661fa6c99aSFlorian Hahn   if (isReverse())
26671fa6c99aSFlorian Hahn     Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
266857f5d8f2SFlorian Hahn   State.set(this, Res);
26691fa6c99aSFlorian Hahn }
26701fa6c99aSFlorian Hahn 
2671a068b974SElvis Wang InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
2672a068b974SElvis Wang                                                   VPCostContext &Ctx) const {
2673a068b974SElvis Wang   if (!Consecutive || IsMasked)
2674a068b974SElvis Wang     return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2675a068b974SElvis Wang 
2676a068b974SElvis Wang   // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2677a068b974SElvis Wang   // here because the EVL recipes using EVL to replace the tail mask. But in the
2678a068b974SElvis Wang   // legacy model, it will always calculate the cost of mask.
2679a068b974SElvis Wang   // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2680a068b974SElvis Wang   // don't need to compare to the legacy cost model.
26819ab5474eSBenjamin Maxwell   Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
2682a068b974SElvis Wang   const Align Alignment =
2683a068b974SElvis Wang       getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));
2684a068b974SElvis Wang   unsigned AS =
2685a068b974SElvis Wang       getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient));
2686a068b974SElvis Wang   InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
2687edf3a55bSJohn Brawn       Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind);
2688a068b974SElvis Wang   if (!Reverse)
2689a068b974SElvis Wang     return Cost;
2690a068b974SElvis Wang 
2691a068b974SElvis Wang   return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
2692edf3a55bSJohn Brawn                                        cast<VectorType>(Ty), {}, Ctx.CostKind,
2693edf3a55bSJohn Brawn                                        0);
2694a068b974SElvis Wang }
2695a068b974SElvis Wang 
26961fa6c99aSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2697e2a72fa5SFlorian Hahn void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent,
2698e2a72fa5SFlorian Hahn                                  VPSlotTracker &SlotTracker) const {
2699e2a72fa5SFlorian Hahn   O << Indent << "WIDEN ";
2700e2a72fa5SFlorian Hahn   printAsOperand(O, SlotTracker);
2701e2a72fa5SFlorian Hahn   O << " = vp.load ";
2702e2a72fa5SFlorian Hahn   printOperands(O, SlotTracker);
2703e2a72fa5SFlorian Hahn }
270412763a06SFlorian Hahn #endif
2705e2a72fa5SFlorian Hahn 
270612763a06SFlorian Hahn void VPWidenStoreRecipe::execute(VPTransformState &State) {
270712763a06SFlorian Hahn   auto *SI = cast<StoreInst>(&Ingredient);
270812763a06SFlorian Hahn 
270912763a06SFlorian Hahn   VPValue *StoredVPValue = getStoredValue();
271012763a06SFlorian Hahn   bool CreateScatter = !isConsecutive();
271112763a06SFlorian Hahn   const Align Alignment = getLoadStoreAlignment(&Ingredient);
271212763a06SFlorian Hahn 
271312763a06SFlorian Hahn   auto &Builder = State.Builder;
271412763a06SFlorian Hahn   State.setDebugLocFrom(getDebugLoc());
271512763a06SFlorian Hahn 
271612763a06SFlorian Hahn   Value *Mask = nullptr;
271712763a06SFlorian Hahn   if (auto *VPMask = getMask()) {
271812763a06SFlorian Hahn     // Mask reversal is only needed for non-all-one (null) masks, as reverse
271912763a06SFlorian Hahn     // of a null all-one mask is a null mask.
272057f5d8f2SFlorian Hahn     Mask = State.get(VPMask);
272112763a06SFlorian Hahn     if (isReverse())
272212763a06SFlorian Hahn       Mask = Builder.CreateVectorReverse(Mask, "reverse");
272312763a06SFlorian Hahn   }
272412763a06SFlorian Hahn 
272557f5d8f2SFlorian Hahn   Value *StoredVal = State.get(StoredVPValue);
272612763a06SFlorian Hahn   if (isReverse()) {
272712763a06SFlorian Hahn     // If we store to reverse consecutive memory locations, then we need
272812763a06SFlorian Hahn     // to reverse the order of elements in the stored value.
272912763a06SFlorian Hahn     StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
273012763a06SFlorian Hahn     // We don't want to update the value in the map as it might be used in
273112763a06SFlorian Hahn     // another expression. So don't call resetVectorValue(StoredVal).
273212763a06SFlorian Hahn   }
273357f5d8f2SFlorian Hahn   Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
273406c3a7d2SFlorian Hahn   Instruction *NewSI = nullptr;
273512763a06SFlorian Hahn   if (CreateScatter)
273612763a06SFlorian Hahn     NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
273712763a06SFlorian Hahn   else if (Mask)
273812763a06SFlorian Hahn     NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
273912763a06SFlorian Hahn   else
274012763a06SFlorian Hahn     NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
274112763a06SFlorian Hahn   State.addMetadata(NewSI, SI);
274212763a06SFlorian Hahn }
274312763a06SFlorian Hahn 
274412763a06SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2745a9bafe91SFlorian Hahn void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
2746a9bafe91SFlorian Hahn                                VPSlotTracker &SlotTracker) const {
2747a9bafe91SFlorian Hahn   O << Indent << "WIDEN store ";
274803975b7fSFlorian Hahn   printOperands(O, SlotTracker);
274903975b7fSFlorian Hahn }
27501fa6c99aSFlorian Hahn #endif
2751e2a72fa5SFlorian Hahn 
27521fa6c99aSFlorian Hahn void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
27531fa6c99aSFlorian Hahn   auto *SI = cast<StoreInst>(&Ingredient);
27541fa6c99aSFlorian Hahn 
27551fa6c99aSFlorian Hahn   VPValue *StoredValue = getStoredValue();
27561fa6c99aSFlorian Hahn   bool CreateScatter = !isConsecutive();
27571fa6c99aSFlorian Hahn   const Align Alignment = getLoadStoreAlignment(&Ingredient);
27581fa6c99aSFlorian Hahn 
27591fa6c99aSFlorian Hahn   auto &Builder = State.Builder;
27601fa6c99aSFlorian Hahn   State.setDebugLocFrom(getDebugLoc());
27611fa6c99aSFlorian Hahn 
27621fa6c99aSFlorian Hahn   CallInst *NewSI = nullptr;
276357f5d8f2SFlorian Hahn   Value *StoredVal = State.get(StoredValue);
2764aae7ac66SFlorian Hahn   Value *EVL = State.get(getEVL(), VPLane(0));
27651fa6c99aSFlorian Hahn   if (isReverse())
27661fa6c99aSFlorian Hahn     StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
27671fa6c99aSFlorian Hahn   Value *Mask = nullptr;
27681fa6c99aSFlorian Hahn   if (VPValue *VPMask = getMask()) {
276957f5d8f2SFlorian Hahn     Mask = State.get(VPMask);
27701fa6c99aSFlorian Hahn     if (isReverse())
27711fa6c99aSFlorian Hahn       Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
27721fa6c99aSFlorian Hahn   } else {
27731fa6c99aSFlorian Hahn     Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
27741fa6c99aSFlorian Hahn   }
277557f5d8f2SFlorian Hahn   Value *Addr = State.get(getAddr(), !CreateScatter);
27761fa6c99aSFlorian Hahn   if (CreateScatter) {
27771fa6c99aSFlorian Hahn     NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
27781fa6c99aSFlorian Hahn                                     Intrinsic::vp_scatter,
27791fa6c99aSFlorian Hahn                                     {StoredVal, Addr, Mask, EVL});
27801fa6c99aSFlorian Hahn   } else {
27811fa6c99aSFlorian Hahn     VectorBuilder VBuilder(Builder);
27821fa6c99aSFlorian Hahn     VBuilder.setEVL(EVL).setMask(Mask);
27831fa6c99aSFlorian Hahn     NewSI = cast<CallInst>(VBuilder.createVectorInstruction(
27841fa6c99aSFlorian Hahn         Instruction::Store, Type::getVoidTy(EVL->getContext()),
27851fa6c99aSFlorian Hahn         {StoredVal, Addr}));
27861fa6c99aSFlorian Hahn   }
27871fa6c99aSFlorian Hahn   NewSI->addParamAttr(
27881fa6c99aSFlorian Hahn       1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));
27891fa6c99aSFlorian Hahn   State.addMetadata(NewSI, SI);
27901fa6c99aSFlorian Hahn }
27911fa6c99aSFlorian Hahn 
2792a068b974SElvis Wang InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
2793a068b974SElvis Wang                                                    VPCostContext &Ctx) const {
2794a068b974SElvis Wang   if (!Consecutive || IsMasked)
2795a068b974SElvis Wang     return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2796a068b974SElvis Wang 
2797a068b974SElvis Wang   // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2798a068b974SElvis Wang   // here because the EVL recipes using EVL to replace the tail mask. But in the
2799a068b974SElvis Wang   // legacy model, it will always calculate the cost of mask.
2800a068b974SElvis Wang   // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2801a068b974SElvis Wang   // don't need to compare to the legacy cost model.
28029ab5474eSBenjamin Maxwell   Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
2803a068b974SElvis Wang   const Align Alignment =
2804a068b974SElvis Wang       getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));
2805a068b974SElvis Wang   unsigned AS =
2806a068b974SElvis Wang       getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient));
2807a068b974SElvis Wang   InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
2808edf3a55bSJohn Brawn       Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind);
2809a068b974SElvis Wang   if (!Reverse)
2810a068b974SElvis Wang     return Cost;
2811a068b974SElvis Wang 
2812a068b974SElvis Wang   return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
2813edf3a55bSJohn Brawn                                        cast<VectorType>(Ty), {}, Ctx.CostKind,
2814edf3a55bSJohn Brawn                                        0);
2815a068b974SElvis Wang }
2816a068b974SElvis Wang 
28171fa6c99aSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2818e2a72fa5SFlorian Hahn void VPWidenStoreEVLRecipe::print(raw_ostream &O, const Twine &Indent,
2819e2a72fa5SFlorian Hahn                                   VPSlotTracker &SlotTracker) const {
2820e2a72fa5SFlorian Hahn   O << Indent << "WIDEN vp.store ";
2821e2a72fa5SFlorian Hahn   printOperands(O, SlotTracker);
2822e2a72fa5SFlorian Hahn }
282303975b7fSFlorian Hahn #endif
282403975b7fSFlorian Hahn 
2825a23efcc7SFlorian Hahn static Value *createBitOrPointerCast(IRBuilderBase &Builder, Value *V,
2826a23efcc7SFlorian Hahn                                      VectorType *DstVTy, const DataLayout &DL) {
2827a23efcc7SFlorian Hahn   // Verify that V is a vector type with same number of elements as DstVTy.
2828a23efcc7SFlorian Hahn   auto VF = DstVTy->getElementCount();
2829a23efcc7SFlorian Hahn   auto *SrcVecTy = cast<VectorType>(V->getType());
2830a23efcc7SFlorian Hahn   assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");
2831a23efcc7SFlorian Hahn   Type *SrcElemTy = SrcVecTy->getElementType();
2832a23efcc7SFlorian Hahn   Type *DstElemTy = DstVTy->getElementType();
2833a23efcc7SFlorian Hahn   assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
2834a23efcc7SFlorian Hahn          "Vector elements must have same size");
2835a23efcc7SFlorian Hahn 
2836a23efcc7SFlorian Hahn   // Do a direct cast if element types are castable.
2837a23efcc7SFlorian Hahn   if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
2838a23efcc7SFlorian Hahn     return Builder.CreateBitOrPointerCast(V, DstVTy);
2839a23efcc7SFlorian Hahn   }
2840a23efcc7SFlorian Hahn   // V cannot be directly casted to desired vector type.
2841a23efcc7SFlorian Hahn   // May happen when V is a floating point vector but DstVTy is a vector of
2842a23efcc7SFlorian Hahn   // pointers or vice-versa. Handle this using a two-step bitcast using an
2843a23efcc7SFlorian Hahn   // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.
2844a23efcc7SFlorian Hahn   assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&
2845a23efcc7SFlorian Hahn          "Only one type should be a pointer type");
2846a23efcc7SFlorian Hahn   assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&
2847a23efcc7SFlorian Hahn          "Only one type should be a floating point type");
2848a23efcc7SFlorian Hahn   Type *IntTy =
2849a23efcc7SFlorian Hahn       IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
2850a23efcc7SFlorian Hahn   auto *VecIntTy = VectorType::get(IntTy, VF);
2851a23efcc7SFlorian Hahn   Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
2852a23efcc7SFlorian Hahn   return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
2853a23efcc7SFlorian Hahn }
2854a23efcc7SFlorian Hahn 
2855a23efcc7SFlorian Hahn /// Return a vector containing interleaved elements from multiple
2856a23efcc7SFlorian Hahn /// smaller input vectors.
2857a23efcc7SFlorian Hahn static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
2858a23efcc7SFlorian Hahn                                 const Twine &Name) {
2859a23efcc7SFlorian Hahn   unsigned Factor = Vals.size();
2860a23efcc7SFlorian Hahn   assert(Factor > 1 && "Tried to interleave invalid number of vectors");
2861a23efcc7SFlorian Hahn 
2862a23efcc7SFlorian Hahn   VectorType *VecTy = cast<VectorType>(Vals[0]->getType());
2863a23efcc7SFlorian Hahn #ifndef NDEBUG
2864a23efcc7SFlorian Hahn   for (Value *Val : Vals)
2865a23efcc7SFlorian Hahn     assert(Val->getType() == VecTy && "Tried to interleave mismatched types");
2866a23efcc7SFlorian Hahn #endif
2867a23efcc7SFlorian Hahn 
2868a23efcc7SFlorian Hahn   // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
2869a23efcc7SFlorian Hahn   // must use intrinsics to interleave.
2870a23efcc7SFlorian Hahn   if (VecTy->isScalableTy()) {
28716c787ff6SFlorian Hahn     VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy);
28726c787ff6SFlorian Hahn     return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,
28736c787ff6SFlorian Hahn                                    Vals,
2874a23efcc7SFlorian Hahn                                    /*FMFSource=*/nullptr, Name);
2875a23efcc7SFlorian Hahn   }
2876a23efcc7SFlorian Hahn 
2877a23efcc7SFlorian Hahn   // Fixed length. Start by concatenating all vectors into a wide vector.
2878a23efcc7SFlorian Hahn   Value *WideVec = concatenateVectors(Builder, Vals);
2879a23efcc7SFlorian Hahn 
2880a23efcc7SFlorian Hahn   // Interleave the elements into the wide vector.
2881a23efcc7SFlorian Hahn   const unsigned NumElts = VecTy->getElementCount().getFixedValue();
2882a23efcc7SFlorian Hahn   return Builder.CreateShuffleVector(
2883a23efcc7SFlorian Hahn       WideVec, createInterleaveMask(NumElts, Factor), Name);
2884a23efcc7SFlorian Hahn }
2885a23efcc7SFlorian Hahn 
2886a23efcc7SFlorian Hahn // Try to vectorize the interleave group that \p Instr belongs to.
2887a23efcc7SFlorian Hahn //
2888a23efcc7SFlorian Hahn // E.g. Translate following interleaved load group (factor = 3):
2889a23efcc7SFlorian Hahn //   for (i = 0; i < N; i+=3) {
2890a23efcc7SFlorian Hahn //     R = Pic[i];             // Member of index 0
2891a23efcc7SFlorian Hahn //     G = Pic[i+1];           // Member of index 1
2892a23efcc7SFlorian Hahn //     B = Pic[i+2];           // Member of index 2
2893a23efcc7SFlorian Hahn //     ... // do something to R, G, B
2894a23efcc7SFlorian Hahn //   }
2895a23efcc7SFlorian Hahn // To:
2896a23efcc7SFlorian Hahn //   %wide.vec = load <12 x i32>                       ; Read 4 tuples of R,G,B
2897a23efcc7SFlorian Hahn //   %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9>   ; R elements
2898a23efcc7SFlorian Hahn //   %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10>  ; G elements
2899a23efcc7SFlorian Hahn //   %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11>  ; B elements
2900a23efcc7SFlorian Hahn //
2901a23efcc7SFlorian Hahn // Or translate following interleaved store group (factor = 3):
2902a23efcc7SFlorian Hahn //   for (i = 0; i < N; i+=3) {
2903a23efcc7SFlorian Hahn //     ... do something to R, G, B
2904a23efcc7SFlorian Hahn //     Pic[i]   = R;           // Member of index 0
2905a23efcc7SFlorian Hahn //     Pic[i+1] = G;           // Member of index 1
2906a23efcc7SFlorian Hahn //     Pic[i+2] = B;           // Member of index 2
2907a23efcc7SFlorian Hahn //   }
2908a23efcc7SFlorian Hahn // To:
2909a23efcc7SFlorian Hahn //   %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
2910a23efcc7SFlorian Hahn //   %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>
2911a23efcc7SFlorian Hahn //   %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
2912a23efcc7SFlorian Hahn //        <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>    ; Interleave R,G,B elements
2913a23efcc7SFlorian Hahn //   store <12 x i32> %interleaved.vec              ; Write 4 tuples of R,G,B
2914a23efcc7SFlorian Hahn void VPInterleaveRecipe::execute(VPTransformState &State) {
2915aae7ac66SFlorian Hahn   assert(!State.Lane && "Interleave group being replicated.");
2916a23efcc7SFlorian Hahn   const InterleaveGroup<Instruction> *Group = IG;
2917a23efcc7SFlorian Hahn   Instruction *Instr = Group->getInsertPos();
2918a23efcc7SFlorian Hahn 
2919a23efcc7SFlorian Hahn   // Prepare for the vector type of the interleaved load/store.
2920a23efcc7SFlorian Hahn   Type *ScalarTy = getLoadStoreType(Instr);
2921a23efcc7SFlorian Hahn   unsigned InterleaveFactor = Group->getFactor();
2922a23efcc7SFlorian Hahn   auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);
2923a23efcc7SFlorian Hahn 
2924a23efcc7SFlorian Hahn   // TODO: extend the masked interleaved-group support to reversed access.
2925a23efcc7SFlorian Hahn   VPValue *BlockInMask = getMask();
2926a23efcc7SFlorian Hahn   assert((!BlockInMask || !Group->isReverse()) &&
2927a23efcc7SFlorian Hahn          "Reversed masked interleave-group not supported.");
2928a23efcc7SFlorian Hahn 
29293ec6f805SFlorian Hahn   VPValue *Addr = getAddr();
29303ec6f805SFlorian Hahn   Value *ResAddr = State.get(Addr, VPLane(0));
29313ec6f805SFlorian Hahn   if (auto *I = dyn_cast<Instruction>(ResAddr))
29323ec6f805SFlorian Hahn     State.setDebugLocFrom(I->getDebugLoc());
29333ec6f805SFlorian Hahn 
2934a23efcc7SFlorian Hahn   // If the group is reverse, adjust the index to refer to the last vector lane
2935a23efcc7SFlorian Hahn   // instead of the first. We adjust the index from the first vector lane,
2936a23efcc7SFlorian Hahn   // rather than directly getting the pointer for lane VF - 1, because the
29373fbf6f8bSFlorian Hahn   // pointer operand of the interleaved access is supposed to be uniform.
2938a23efcc7SFlorian Hahn   if (Group->isReverse()) {
2939a23efcc7SFlorian Hahn     Value *RuntimeVF =
2940a23efcc7SFlorian Hahn         getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
29413ec6f805SFlorian Hahn     Value *Index =
29423ec6f805SFlorian Hahn         State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));
29437f746518SFlorian Hahn     Index = State.Builder.CreateMul(Index,
2944a23efcc7SFlorian Hahn                                     State.Builder.getInt32(Group->getFactor()));
29457f746518SFlorian Hahn     Index = State.Builder.CreateNeg(Index);
2946a23efcc7SFlorian Hahn 
2947a23efcc7SFlorian Hahn     bool InBounds = false;
29483ec6f805SFlorian Hahn     if (auto *Gep = dyn_cast<GetElementPtrInst>(ResAddr->stripPointerCasts()))
29493ec6f805SFlorian Hahn       InBounds = Gep->isInBounds();
29507f746518SFlorian Hahn     ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds);
29513ec6f805SFlorian Hahn   }
2952a23efcc7SFlorian Hahn 
2953a23efcc7SFlorian Hahn   State.setDebugLocFrom(Instr->getDebugLoc());
2954a23efcc7SFlorian Hahn   Value *PoisonVec = PoisonValue::get(VecTy);
2955a23efcc7SFlorian Hahn 
295606c3a7d2SFlorian Hahn   auto CreateGroupMask = [&BlockInMask, &State,
295706c3a7d2SFlorian Hahn                           &InterleaveFactor](Value *MaskForGaps) -> Value * {
2958a23efcc7SFlorian Hahn     if (State.VF.isScalable()) {
2959a23efcc7SFlorian Hahn       assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
29606c787ff6SFlorian Hahn       assert(InterleaveFactor == 2 &&
2961a23efcc7SFlorian Hahn              "Unsupported deinterleave factor for scalable vectors");
296257f5d8f2SFlorian Hahn       auto *ResBlockInMask = State.get(BlockInMask);
29636c787ff6SFlorian Hahn       SmallVector<Value *, 2> Ops = {ResBlockInMask, ResBlockInMask};
29646c787ff6SFlorian Hahn       auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),
29656c787ff6SFlorian Hahn                                      State.VF.getKnownMinValue() * 2, true);
29666c787ff6SFlorian Hahn       return State.Builder.CreateIntrinsic(
29676c787ff6SFlorian Hahn           MaskTy, Intrinsic::vector_interleave2, Ops,
29686c787ff6SFlorian Hahn           /*FMFSource=*/nullptr, "interleaved.mask");
2969a23efcc7SFlorian Hahn     }
2970a23efcc7SFlorian Hahn 
2971a23efcc7SFlorian Hahn     if (!BlockInMask)
2972a23efcc7SFlorian Hahn       return MaskForGaps;
2973a23efcc7SFlorian Hahn 
297457f5d8f2SFlorian Hahn     Value *ResBlockInMask = State.get(BlockInMask);
2975a23efcc7SFlorian Hahn     Value *ShuffledMask = State.Builder.CreateShuffleVector(
297606c3a7d2SFlorian Hahn         ResBlockInMask,
2977a23efcc7SFlorian Hahn         createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),
2978a23efcc7SFlorian Hahn         "interleaved.mask");
2979a23efcc7SFlorian Hahn     return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
2980a23efcc7SFlorian Hahn                                                    ShuffledMask, MaskForGaps)
2981a23efcc7SFlorian Hahn                        : ShuffledMask;
2982a23efcc7SFlorian Hahn   };
2983a23efcc7SFlorian Hahn 
2984a23efcc7SFlorian Hahn   const DataLayout &DL = Instr->getDataLayout();
2985a23efcc7SFlorian Hahn   // Vectorize the interleaved load group.
2986a23efcc7SFlorian Hahn   if (isa<LoadInst>(Instr)) {
2987a23efcc7SFlorian Hahn     Value *MaskForGaps = nullptr;
2988a23efcc7SFlorian Hahn     if (NeedsMaskForGaps) {
2989a23efcc7SFlorian Hahn       MaskForGaps = createBitMaskForGaps(State.Builder,
2990a23efcc7SFlorian Hahn                                          State.VF.getKnownMinValue(), *Group);
2991a23efcc7SFlorian Hahn       assert(MaskForGaps && "Mask for Gaps is required but it is null");
2992a23efcc7SFlorian Hahn     }
2993a23efcc7SFlorian Hahn 
2994a23efcc7SFlorian Hahn     Instruction *NewLoad;
2995a23efcc7SFlorian Hahn     if (BlockInMask || MaskForGaps) {
299606c3a7d2SFlorian Hahn       Value *GroupMask = CreateGroupMask(MaskForGaps);
299706c3a7d2SFlorian Hahn       NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr,
2998a23efcc7SFlorian Hahn                                                Group->getAlign(), GroupMask,
2999a23efcc7SFlorian Hahn                                                PoisonVec, "wide.masked.vec");
3000a23efcc7SFlorian Hahn     } else
300106c3a7d2SFlorian Hahn       NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,
300206c3a7d2SFlorian Hahn                                                 Group->getAlign(), "wide.vec");
3003a23efcc7SFlorian Hahn     Group->addMetadata(NewLoad);
3004a23efcc7SFlorian Hahn 
3005a23efcc7SFlorian Hahn     ArrayRef<VPValue *> VPDefs = definedValues();
3006a23efcc7SFlorian Hahn     const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
3007a23efcc7SFlorian Hahn     if (VecTy->isScalableTy()) {
30086c787ff6SFlorian Hahn       assert(InterleaveFactor == 2 &&
3009a23efcc7SFlorian Hahn              "Unsupported deinterleave factor for scalable vectors");
3010a23efcc7SFlorian Hahn 
3011a23efcc7SFlorian Hahn         // Scalable vectors cannot use arbitrary shufflevectors (only splats),
3012a23efcc7SFlorian Hahn         // so must use intrinsics to deinterleave.
30136c787ff6SFlorian Hahn       Value *DI = State.Builder.CreateIntrinsic(
30146c787ff6SFlorian Hahn           Intrinsic::vector_deinterleave2, VecTy, NewLoad,
3015a23efcc7SFlorian Hahn           /*FMFSource=*/nullptr, "strided.vec");
30166c787ff6SFlorian Hahn       unsigned J = 0;
30176c787ff6SFlorian Hahn       for (unsigned I = 0; I < InterleaveFactor; ++I) {
3018a23efcc7SFlorian Hahn         Instruction *Member = Group->getMember(I);
30196c787ff6SFlorian Hahn 
30206c787ff6SFlorian Hahn         if (!Member)
3021a23efcc7SFlorian Hahn           continue;
30226c787ff6SFlorian Hahn 
30236c787ff6SFlorian Hahn         Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
3024a23efcc7SFlorian Hahn         // If this member has different type, cast the result type.
3025a23efcc7SFlorian Hahn         if (Member->getType() != ScalarTy) {
3026a23efcc7SFlorian Hahn           VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
3027a23efcc7SFlorian Hahn           StridedVec =
3028a23efcc7SFlorian Hahn               createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
3029a23efcc7SFlorian Hahn         }
3030a23efcc7SFlorian Hahn 
3031a23efcc7SFlorian Hahn         if (Group->isReverse())
303206c3a7d2SFlorian Hahn           StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
3033a23efcc7SFlorian Hahn 
303457f5d8f2SFlorian Hahn         State.set(VPDefs[J], StridedVec);
3035a23efcc7SFlorian Hahn         ++J;
3036a23efcc7SFlorian Hahn       }
3037a23efcc7SFlorian Hahn 
3038a23efcc7SFlorian Hahn       return;
3039a23efcc7SFlorian Hahn     }
3040a23efcc7SFlorian Hahn 
3041a23efcc7SFlorian Hahn     // For each member in the group, shuffle out the appropriate data from the
3042a23efcc7SFlorian Hahn     // wide loads.
3043a23efcc7SFlorian Hahn     unsigned J = 0;
3044a23efcc7SFlorian Hahn     for (unsigned I = 0; I < InterleaveFactor; ++I) {
3045a23efcc7SFlorian Hahn       Instruction *Member = Group->getMember(I);
3046a23efcc7SFlorian Hahn 
3047a23efcc7SFlorian Hahn       // Skip the gaps in the group.
3048a23efcc7SFlorian Hahn       if (!Member)
3049a23efcc7SFlorian Hahn         continue;
3050a23efcc7SFlorian Hahn 
3051a23efcc7SFlorian Hahn       auto StrideMask =
3052a23efcc7SFlorian Hahn           createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());
305306c3a7d2SFlorian Hahn       Value *StridedVec =
305406c3a7d2SFlorian Hahn           State.Builder.CreateShuffleVector(NewLoad, StrideMask, "strided.vec");
3055a23efcc7SFlorian Hahn 
3056a23efcc7SFlorian Hahn       // If this member has different type, cast the result type.
3057a23efcc7SFlorian Hahn       if (Member->getType() != ScalarTy) {
3058a23efcc7SFlorian Hahn         assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
3059a23efcc7SFlorian Hahn         VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
3060a23efcc7SFlorian Hahn         StridedVec =
3061a23efcc7SFlorian Hahn             createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
3062a23efcc7SFlorian Hahn       }
3063a23efcc7SFlorian Hahn 
3064a23efcc7SFlorian Hahn       if (Group->isReverse())
3065a23efcc7SFlorian Hahn         StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
3066a23efcc7SFlorian Hahn 
306757f5d8f2SFlorian Hahn       State.set(VPDefs[J], StridedVec);
3068a23efcc7SFlorian Hahn       ++J;
3069a23efcc7SFlorian Hahn     }
3070a23efcc7SFlorian Hahn     return;
3071a23efcc7SFlorian Hahn   }
3072a23efcc7SFlorian Hahn 
3073a23efcc7SFlorian Hahn   // The sub vector type for current instruction.
3074a23efcc7SFlorian Hahn   auto *SubVT = VectorType::get(ScalarTy, State.VF);
3075a23efcc7SFlorian Hahn 
3076a23efcc7SFlorian Hahn   // Vectorize the interleaved store group.
3077a23efcc7SFlorian Hahn   Value *MaskForGaps =
3078a23efcc7SFlorian Hahn       createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group);
3079a23efcc7SFlorian Hahn   assert((!MaskForGaps || !State.VF.isScalable()) &&
3080a23efcc7SFlorian Hahn          "masking gaps for scalable vectors is not yet supported.");
3081a23efcc7SFlorian Hahn   ArrayRef<VPValue *> StoredValues = getStoredValues();
3082a23efcc7SFlorian Hahn   // Collect the stored vector from each member.
3083a23efcc7SFlorian Hahn   SmallVector<Value *, 4> StoredVecs;
3084a23efcc7SFlorian Hahn   unsigned StoredIdx = 0;
3085a23efcc7SFlorian Hahn   for (unsigned i = 0; i < InterleaveFactor; i++) {
3086a23efcc7SFlorian Hahn     assert((Group->getMember(i) || MaskForGaps) &&
3087a23efcc7SFlorian Hahn            "Fail to get a member from an interleaved store group");
3088a23efcc7SFlorian Hahn     Instruction *Member = Group->getMember(i);
3089a23efcc7SFlorian Hahn 
3090a23efcc7SFlorian Hahn     // Skip the gaps in the group.
3091a23efcc7SFlorian Hahn     if (!Member) {
3092a23efcc7SFlorian Hahn       Value *Undef = PoisonValue::get(SubVT);
3093a23efcc7SFlorian Hahn       StoredVecs.push_back(Undef);
3094a23efcc7SFlorian Hahn       continue;
3095a23efcc7SFlorian Hahn     }
3096a23efcc7SFlorian Hahn 
309757f5d8f2SFlorian Hahn     Value *StoredVec = State.get(StoredValues[StoredIdx]);
3098a23efcc7SFlorian Hahn     ++StoredIdx;
3099a23efcc7SFlorian Hahn 
3100a23efcc7SFlorian Hahn     if (Group->isReverse())
3101a23efcc7SFlorian Hahn       StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");
3102a23efcc7SFlorian Hahn 
3103a23efcc7SFlorian Hahn     // If this member has different type, cast it to a unified type.
3104a23efcc7SFlorian Hahn 
3105a23efcc7SFlorian Hahn     if (StoredVec->getType() != SubVT)
3106a23efcc7SFlorian Hahn       StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);
3107a23efcc7SFlorian Hahn 
3108a23efcc7SFlorian Hahn     StoredVecs.push_back(StoredVec);
3109a23efcc7SFlorian Hahn   }
3110a23efcc7SFlorian Hahn 
3111a23efcc7SFlorian Hahn   // Interleave all the smaller vectors into one wider vector.
311206c3a7d2SFlorian Hahn   Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");
3113a23efcc7SFlorian Hahn   Instruction *NewStoreInstr;
3114a23efcc7SFlorian Hahn   if (BlockInMask || MaskForGaps) {
311506c3a7d2SFlorian Hahn     Value *GroupMask = CreateGroupMask(MaskForGaps);
3116a23efcc7SFlorian Hahn     NewStoreInstr = State.Builder.CreateMaskedStore(
311706c3a7d2SFlorian Hahn         IVec, ResAddr, Group->getAlign(), GroupMask);
3118a23efcc7SFlorian Hahn   } else
311906c3a7d2SFlorian Hahn     NewStoreInstr =
312006c3a7d2SFlorian Hahn         State.Builder.CreateAlignedStore(IVec, ResAddr, Group->getAlign());
3121a23efcc7SFlorian Hahn 
3122a23efcc7SFlorian Hahn   Group->addMetadata(NewStoreInstr);
3123a23efcc7SFlorian Hahn }
3124a23efcc7SFlorian Hahn 
3125a23efcc7SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3126a23efcc7SFlorian Hahn void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent,
3127a23efcc7SFlorian Hahn                                VPSlotTracker &SlotTracker) const {
3128a23efcc7SFlorian Hahn   O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
3129a23efcc7SFlorian Hahn   IG->getInsertPos()->printAsOperand(O, false);
3130a23efcc7SFlorian Hahn   O << ", ";
3131a23efcc7SFlorian Hahn   getAddr()->printAsOperand(O, SlotTracker);
3132a23efcc7SFlorian Hahn   VPValue *Mask = getMask();
3133a23efcc7SFlorian Hahn   if (Mask) {
3134a23efcc7SFlorian Hahn     O << ", ";
3135a23efcc7SFlorian Hahn     Mask->printAsOperand(O, SlotTracker);
3136a23efcc7SFlorian Hahn   }
3137a23efcc7SFlorian Hahn 
3138a23efcc7SFlorian Hahn   unsigned OpIdx = 0;
3139a23efcc7SFlorian Hahn   for (unsigned i = 0; i < IG->getFactor(); ++i) {
3140a23efcc7SFlorian Hahn     if (!IG->getMember(i))
3141a23efcc7SFlorian Hahn       continue;
3142a23efcc7SFlorian Hahn     if (getNumStoreOperands() > 0) {
3143a23efcc7SFlorian Hahn       O << "\n" << Indent << "  store ";
3144a23efcc7SFlorian Hahn       getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker);
3145a23efcc7SFlorian Hahn       O << " to index " << i;
3146a23efcc7SFlorian Hahn     } else {
3147a23efcc7SFlorian Hahn       O << "\n" << Indent << "  ";
3148a23efcc7SFlorian Hahn       getVPValue(OpIdx)->printAsOperand(O, SlotTracker);
3149a23efcc7SFlorian Hahn       O << " = load from index " << i;
3150a23efcc7SFlorian Hahn     }
3151a23efcc7SFlorian Hahn     ++OpIdx;
3152a23efcc7SFlorian Hahn   }
3153a23efcc7SFlorian Hahn }
3154a23efcc7SFlorian Hahn #endif
3155a23efcc7SFlorian Hahn 
3156fa3258ecSFlorian Hahn InstructionCost VPInterleaveRecipe::computeCost(ElementCount VF,
3157fa3258ecSFlorian Hahn                                                 VPCostContext &Ctx) const {
31582a6b09e0SFlorian Hahn   Instruction *InsertPos = getInsertPos();
31592a6b09e0SFlorian Hahn   // Find the VPValue index of the interleave group. We need to skip gaps.
31602a6b09e0SFlorian Hahn   unsigned InsertPosIdx = 0;
31612a6b09e0SFlorian Hahn   for (unsigned Idx = 0; IG->getFactor(); ++Idx)
31622a6b09e0SFlorian Hahn     if (auto *Member = IG->getMember(Idx)) {
31632a6b09e0SFlorian Hahn       if (Member == InsertPos)
31642a6b09e0SFlorian Hahn         break;
31652a6b09e0SFlorian Hahn       InsertPosIdx++;
31662a6b09e0SFlorian Hahn     }
31672a46e5d0SFlorian Hahn   Type *ValTy = Ctx.Types.inferScalarType(
31682a6b09e0SFlorian Hahn       getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx)
31692a6b09e0SFlorian Hahn                                 : getStoredValues()[InsertPosIdx]);
31709ab5474eSBenjamin Maxwell   auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
31712a6b09e0SFlorian Hahn   unsigned AS = getLoadStoreAddressSpace(InsertPos);
31722a46e5d0SFlorian Hahn 
31732a46e5d0SFlorian Hahn   unsigned InterleaveFactor = IG->getFactor();
31742a46e5d0SFlorian Hahn   auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
31752a46e5d0SFlorian Hahn 
31762a46e5d0SFlorian Hahn   // Holds the indices of existing members in the interleaved group.
31772a46e5d0SFlorian Hahn   SmallVector<unsigned, 4> Indices;
31782a46e5d0SFlorian Hahn   for (unsigned IF = 0; IF < InterleaveFactor; IF++)
31792a46e5d0SFlorian Hahn     if (IG->getMember(IF))
31802a46e5d0SFlorian Hahn       Indices.push_back(IF);
31812a46e5d0SFlorian Hahn 
31822a46e5d0SFlorian Hahn   // Calculate the cost of the whole interleaved group.
31832a46e5d0SFlorian Hahn   InstructionCost Cost = Ctx.TTI.getInterleavedMemoryOpCost(
31842a6b09e0SFlorian Hahn       InsertPos->getOpcode(), WideVecTy, IG->getFactor(), Indices,
3185edf3a55bSJohn Brawn       IG->getAlign(), AS, Ctx.CostKind, getMask(), NeedsMaskForGaps);
31862a46e5d0SFlorian Hahn 
31872a46e5d0SFlorian Hahn   if (!IG->isReverse())
31882a46e5d0SFlorian Hahn     return Cost;
31892a46e5d0SFlorian Hahn 
31902a46e5d0SFlorian Hahn   return Cost + IG->getNumMembers() *
31912a46e5d0SFlorian Hahn                     Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
3192edf3a55bSJohn Brawn                                            VectorTy, std::nullopt, Ctx.CostKind,
3193edf3a55bSJohn Brawn                                            0);
3194fa3258ecSFlorian Hahn }
3195fa3258ecSFlorian Hahn 
319603975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
319703975b7fSFlorian Hahn void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
319803975b7fSFlorian Hahn                                    VPSlotTracker &SlotTracker) const {
319903975b7fSFlorian Hahn   O << Indent << "EMIT ";
320003975b7fSFlorian Hahn   printAsOperand(O, SlotTracker);
320103975b7fSFlorian Hahn   O << " = CANONICAL-INDUCTION ";
3202f7a8a78cSFlorian Hahn   printOperands(O, SlotTracker);
320303975b7fSFlorian Hahn }
320403975b7fSFlorian Hahn #endif
320503975b7fSFlorian Hahn 
32062906f362SFlorian Hahn bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) {
32072c692d89SFlorian Hahn   return IsScalarAfterVectorization &&
32082906f362SFlorian Hahn          (!IsScalable || vputils::onlyFirstLaneUsed(this));
320903975b7fSFlorian Hahn }
321003975b7fSFlorian Hahn 
3211241349ffSFlorian Hahn void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
321295e509a9SFlorian Hahn   assert(getInductionDescriptor().getKind() ==
321395e509a9SFlorian Hahn              InductionDescriptor::IK_PtrInduction &&
3214241349ffSFlorian Hahn          "Not a pointer induction according to InductionDescriptor!");
3215241349ffSFlorian Hahn   assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() &&
3216241349ffSFlorian Hahn          "Unexpected type.");
3217241349ffSFlorian Hahn   assert(!onlyScalarsGenerated(State.VF.isScalable()) &&
3218241349ffSFlorian Hahn          "Recipe should have been replaced");
3219241349ffSFlorian Hahn 
32208ec40675SFlorian Hahn   unsigned CurrentPart = getUnrollPart(*this);
3221241349ffSFlorian Hahn 
3222241349ffSFlorian Hahn   // Build a pointer phi
3223241349ffSFlorian Hahn   Value *ScalarStartValue = getStartValue()->getLiveInIRValue();
3224241349ffSFlorian Hahn   Type *ScStValueType = ScalarStartValue->getType();
3225241349ffSFlorian Hahn 
3226241349ffSFlorian Hahn   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
32278ec40675SFlorian Hahn   PHINode *NewPointerPhi = nullptr;
32288ec40675SFlorian Hahn   if (CurrentPart == 0) {
3229a7fda0e1SFlorian Hahn     auto *IVR = cast<VPHeaderPHIRecipe>(&getParent()
3230a7fda0e1SFlorian Hahn                                              ->getPlan()
3231a7fda0e1SFlorian Hahn                                              ->getVectorLoopRegion()
3232a7fda0e1SFlorian Hahn                                              ->getEntryBasicBlock()
3233a7fda0e1SFlorian Hahn                                              ->front());
3234a7fda0e1SFlorian Hahn     PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, /*IsScalar*/ true));
32358ec40675SFlorian Hahn     NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi",
32368ec40675SFlorian Hahn                                     CanonicalIV->getIterator());
3237241349ffSFlorian Hahn     NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
32382067e604SFlorian Hahn     NewPointerPhi->setDebugLoc(getDebugLoc());
32398ec40675SFlorian Hahn   } else {
32408ec40675SFlorian Hahn     // The recipe has been unrolled. In that case, fetch the single pointer phi
32418ec40675SFlorian Hahn     // shared among all unrolled parts of the recipe.
32428ec40675SFlorian Hahn     auto *GEP =
324357f5d8f2SFlorian Hahn         cast<GetElementPtrInst>(State.get(getFirstUnrolledPartOperand()));
32448ec40675SFlorian Hahn     NewPointerPhi = cast<PHINode>(GEP->getPointerOperand());
32458ec40675SFlorian Hahn   }
3246241349ffSFlorian Hahn 
3247241349ffSFlorian Hahn   // A pointer induction, performed by using a gep
3248241349ffSFlorian Hahn   BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
324995e509a9SFlorian Hahn   Value *ScalarStepValue = State.get(getStepValue(), VPLane(0));
325095e509a9SFlorian Hahn   Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue());
3251241349ffSFlorian Hahn   Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
3252241349ffSFlorian Hahn   // Add induction update using an incorrect block temporarily. The phi node
3253241349ffSFlorian Hahn   // will be fixed after VPlan execution. Note that at this point the latch
3254241349ffSFlorian Hahn   // block cannot be used, as it does not exist yet.
3255241349ffSFlorian Hahn   // TODO: Model increment value in VPlan, by turning the recipe into a
3256241349ffSFlorian Hahn   // multi-def and a subclass of VPHeaderPHIRecipe.
32578ec40675SFlorian Hahn   if (CurrentPart == 0) {
32588ec40675SFlorian Hahn     // The recipe represents the first part of the pointer induction. Create the
32598ec40675SFlorian Hahn     // GEP to increment the phi across all unrolled parts.
32608ec40675SFlorian Hahn     unsigned UF = CurrentPart == 0 ? getParent()->getPlan()->getUF() : 1;
32618ec40675SFlorian Hahn     Value *NumUnrolledElems =
32628ec40675SFlorian Hahn         State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, UF));
32638ec40675SFlorian Hahn 
32648ec40675SFlorian Hahn     Value *InductionGEP = GetElementPtrInst::Create(
32658ec40675SFlorian Hahn         State.Builder.getInt8Ty(), NewPointerPhi,
32668ec40675SFlorian Hahn         State.Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind",
32678ec40675SFlorian Hahn         InductionLoc);
32688ec40675SFlorian Hahn 
3269241349ffSFlorian Hahn     NewPointerPhi->addIncoming(InductionGEP, VectorPH);
32708ec40675SFlorian Hahn   }
3271241349ffSFlorian Hahn 
327206c3a7d2SFlorian Hahn   // Create actual address geps that use the pointer phi as base and a
327306c3a7d2SFlorian Hahn   // vectorized version of the step value (<step*0, ..., step*N>) as offset.
3274241349ffSFlorian Hahn   Type *VecPhiType = VectorType::get(PhiType, State.VF);
32758ec40675SFlorian Hahn   Value *StartOffsetScalar = State.Builder.CreateMul(
32768ec40675SFlorian Hahn       RuntimeVF, ConstantInt::get(PhiType, CurrentPart));
3277241349ffSFlorian Hahn   Value *StartOffset =
3278241349ffSFlorian Hahn       State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
3279241349ffSFlorian Hahn   // Create a vector of consecutive numbers from zero to VF.
3280241349ffSFlorian Hahn   StartOffset = State.Builder.CreateAdd(
3281241349ffSFlorian Hahn       StartOffset, State.Builder.CreateStepVector(VecPhiType));
3282241349ffSFlorian Hahn 
3283aae7ac66SFlorian Hahn   assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) &&
3284241349ffSFlorian Hahn          "scalar step must be the same across all parts");
3285241349ffSFlorian Hahn   Value *GEP = State.Builder.CreateGEP(
3286241349ffSFlorian Hahn       State.Builder.getInt8Ty(), NewPointerPhi,
3287ffcff2f4SShih-Po Hung       State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat(
3288ffcff2f4SShih-Po Hung                                                State.VF, ScalarStepValue)),
3289ffcff2f4SShih-Po Hung       "vector.gep");
329057f5d8f2SFlorian Hahn   State.set(this, GEP);
3291241349ffSFlorian Hahn }
3292241349ffSFlorian Hahn 
329303975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
329403975b7fSFlorian Hahn void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
329503975b7fSFlorian Hahn                                           VPSlotTracker &SlotTracker) const {
32968ec40675SFlorian Hahn   assert((getNumOperands() == 2 || getNumOperands() == 4) &&
32978ec40675SFlorian Hahn          "unexpected number of operands");
329803975b7fSFlorian Hahn   O << Indent << "EMIT ";
329903975b7fSFlorian Hahn   printAsOperand(O, SlotTracker);
330003975b7fSFlorian Hahn   O << " = WIDEN-POINTER-INDUCTION ";
330103975b7fSFlorian Hahn   getStartValue()->printAsOperand(O, SlotTracker);
3302e64650d7SFlorian Hahn   O << ", ";
330395e509a9SFlorian Hahn   getStepValue()->printAsOperand(O, SlotTracker);
33048ec40675SFlorian Hahn   if (getNumOperands() == 4) {
33058ec40675SFlorian Hahn     O << ", ";
33068ec40675SFlorian Hahn     getOperand(2)->printAsOperand(O, SlotTracker);
33078ec40675SFlorian Hahn     O << ", ";
33088ec40675SFlorian Hahn     getOperand(3)->printAsOperand(O, SlotTracker);
33098ec40675SFlorian Hahn   }
331003975b7fSFlorian Hahn }
331103975b7fSFlorian Hahn #endif
331203975b7fSFlorian Hahn 
331303975b7fSFlorian Hahn void VPExpandSCEVRecipe::execute(VPTransformState &State) {
3314aae7ac66SFlorian Hahn   assert(!State.Lane && "cannot be used in per-lane");
33156c8f41d3SFlorian Hahn   if (State.ExpandedSCEVs.contains(Expr)) {
33166c8f41d3SFlorian Hahn     // SCEV Expr has already been expanded, result must already be set. At the
33176c8f41d3SFlorian Hahn     // moment we have to execute the entry block twice (once before skeleton
33186c8f41d3SFlorian Hahn     // creation to get expanded SCEVs used by the skeleton and once during
33196c8f41d3SFlorian Hahn     // regular VPlan execution).
33206c8f41d3SFlorian Hahn     State.Builder.SetInsertPoint(State.CFG.VPBB2IRBB[getParent()]);
33216c8f41d3SFlorian Hahn     assert(State.get(this, VPLane(0)) == State.ExpandedSCEVs[Expr] &&
33226c8f41d3SFlorian Hahn            "Results must match");
33236c8f41d3SFlorian Hahn     return;
33246c8f41d3SFlorian Hahn   }
33256c8f41d3SFlorian Hahn 
33262d209d96SNikita Popov   const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
332703975b7fSFlorian Hahn   SCEVExpander Exp(SE, DL, "induction");
332803975b7fSFlorian Hahn 
332903975b7fSFlorian Hahn   Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
333003975b7fSFlorian Hahn                                  &*State.Builder.GetInsertPoint());
3331236a0e82SFlorian Hahn   State.ExpandedSCEVs[Expr] = Res;
3332aae7ac66SFlorian Hahn   State.set(this, Res, VPLane(0));
333303975b7fSFlorian Hahn }
333403975b7fSFlorian Hahn 
333503975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
333603975b7fSFlorian Hahn void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
333703975b7fSFlorian Hahn                                VPSlotTracker &SlotTracker) const {
333803975b7fSFlorian Hahn   O << Indent << "EMIT ";
33393829fd75SFlorian Hahn   printAsOperand(O, SlotTracker);
334003975b7fSFlorian Hahn   O << " = EXPAND SCEV " << *Expr;
334103975b7fSFlorian Hahn }
334203975b7fSFlorian Hahn #endif
334303975b7fSFlorian Hahn 
334403975b7fSFlorian Hahn void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
334557f5d8f2SFlorian Hahn   Value *CanonicalIV = State.get(getOperand(0), /*IsScalar*/ true);
334603975b7fSFlorian Hahn   Type *STy = CanonicalIV->getType();
334703975b7fSFlorian Hahn   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
334803975b7fSFlorian Hahn   ElementCount VF = State.VF;
334903975b7fSFlorian Hahn   Value *VStart = VF.isScalar()
335003975b7fSFlorian Hahn                       ? CanonicalIV
335103975b7fSFlorian Hahn                       : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
33528ec40675SFlorian Hahn   Value *VStep = createStepForVF(Builder, STy, VF, getUnrollPart(*this));
335303975b7fSFlorian Hahn   if (VF.isVector()) {
335403975b7fSFlorian Hahn     VStep = Builder.CreateVectorSplat(VF, VStep);
335503975b7fSFlorian Hahn     VStep =
335603975b7fSFlorian Hahn         Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
335703975b7fSFlorian Hahn   }
335803975b7fSFlorian Hahn   Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
335957f5d8f2SFlorian Hahn   State.set(this, CanonicalVectorIV);
336003975b7fSFlorian Hahn }
336103975b7fSFlorian Hahn 
336203975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
336303975b7fSFlorian Hahn void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
336403975b7fSFlorian Hahn                                      VPSlotTracker &SlotTracker) const {
336503975b7fSFlorian Hahn   O << Indent << "EMIT ";
336603975b7fSFlorian Hahn   printAsOperand(O, SlotTracker);
336703975b7fSFlorian Hahn   O << " = WIDEN-CANONICAL-INDUCTION ";
336803975b7fSFlorian Hahn   printOperands(O, SlotTracker);
336903975b7fSFlorian Hahn }
337003975b7fSFlorian Hahn #endif
337103975b7fSFlorian Hahn 
337203975b7fSFlorian Hahn void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
337303975b7fSFlorian Hahn   auto &Builder = State.Builder;
337403975b7fSFlorian Hahn   // Create a vector from the initial value.
337503975b7fSFlorian Hahn   auto *VectorInit = getStartValue()->getLiveInIRValue();
337603975b7fSFlorian Hahn 
337703975b7fSFlorian Hahn   Type *VecTy = State.VF.isScalar()
337803975b7fSFlorian Hahn                     ? VectorInit->getType()
337903975b7fSFlorian Hahn                     : VectorType::get(VectorInit->getType(), State.VF);
338003975b7fSFlorian Hahn 
338103975b7fSFlorian Hahn   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
338203975b7fSFlorian Hahn   if (State.VF.isVector()) {
338303975b7fSFlorian Hahn     auto *IdxTy = Builder.getInt32Ty();
338403975b7fSFlorian Hahn     auto *One = ConstantInt::get(IdxTy, 1);
338503975b7fSFlorian Hahn     IRBuilder<>::InsertPointGuard Guard(Builder);
338603975b7fSFlorian Hahn     Builder.SetInsertPoint(VectorPH->getTerminator());
338703975b7fSFlorian Hahn     auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
338803975b7fSFlorian Hahn     auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
338903975b7fSFlorian Hahn     VectorInit = Builder.CreateInsertElement(
339003975b7fSFlorian Hahn         PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
339103975b7fSFlorian Hahn   }
339203975b7fSFlorian Hahn 
339303975b7fSFlorian Hahn   // Create a phi node for the new recurrence.
339406c3a7d2SFlorian Hahn   PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur");
339506c3a7d2SFlorian Hahn   Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
339606c3a7d2SFlorian Hahn   Phi->addIncoming(VectorInit, VectorPH);
339757f5d8f2SFlorian Hahn   State.set(this, Phi);
339803975b7fSFlorian Hahn }
339903975b7fSFlorian Hahn 
3400680901edSFlorian Hahn InstructionCost
3401680901edSFlorian Hahn VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF,
3402680901edSFlorian Hahn                                              VPCostContext &Ctx) const {
340332003857SFlorian Hahn   if (VF.isScalar())
3404edf3a55bSJohn Brawn     return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
340532003857SFlorian Hahn 
3406680901edSFlorian Hahn   if (VF.isScalable() && VF.getKnownMinValue() == 1)
3407680901edSFlorian Hahn     return InstructionCost::getInvalid();
3408680901edSFlorian Hahn 
3409680901edSFlorian Hahn   SmallVector<int> Mask(VF.getKnownMinValue());
3410680901edSFlorian Hahn   std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
3411680901edSFlorian Hahn   Type *VectorTy =
34129ab5474eSBenjamin Maxwell       toVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
3413680901edSFlorian Hahn 
3414680901edSFlorian Hahn   return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Splice,
3415edf3a55bSJohn Brawn                                 cast<VectorType>(VectorTy), Mask, Ctx.CostKind,
3416680901edSFlorian Hahn                                 VF.getKnownMinValue() - 1);
3417680901edSFlorian Hahn }
3418680901edSFlorian Hahn 
341903975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
342003975b7fSFlorian Hahn void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
342103975b7fSFlorian Hahn                                             VPSlotTracker &SlotTracker) const {
342203975b7fSFlorian Hahn   O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
342303975b7fSFlorian Hahn   printAsOperand(O, SlotTracker);
342403975b7fSFlorian Hahn   O << " = phi ";
342503975b7fSFlorian Hahn   printOperands(O, SlotTracker);
342603975b7fSFlorian Hahn }
342703975b7fSFlorian Hahn #endif
342803975b7fSFlorian Hahn 
342903975b7fSFlorian Hahn void VPReductionPHIRecipe::execute(VPTransformState &State) {
343003975b7fSFlorian Hahn   auto &Builder = State.Builder;
343103975b7fSFlorian Hahn 
3432795e35a6SSam Tebbs   // If this phi is fed by a scaled reduction then it should output a
3433795e35a6SSam Tebbs   // vector with fewer elements than the VF.
3434795e35a6SSam Tebbs   ElementCount VF = State.VF.divideCoefficientBy(VFScaleFactor);
3435795e35a6SSam Tebbs 
34366011d6b2SFlorian Hahn   // Reductions do not have to start at zero. They can start with
34376011d6b2SFlorian Hahn   // any loop invariant values.
34386011d6b2SFlorian Hahn   VPValue *StartVPV = getStartValue();
34396011d6b2SFlorian Hahn   Value *StartV = StartVPV->getLiveInIRValue();
34406011d6b2SFlorian Hahn 
344103975b7fSFlorian Hahn   // In order to support recurrences we need to be able to vectorize Phi nodes.
344203975b7fSFlorian Hahn   // Phi nodes have cycles, so we need to vectorize them in two stages. This is
344303975b7fSFlorian Hahn   // stage #1: We create a new vector PHI node with no incoming edges. We'll use
344403975b7fSFlorian Hahn   // this value when we vectorize all of the instructions that use the PHI.
34455f096fd2SFlorian Hahn   bool ScalarPHI = State.VF.isScalar() || IsInLoop;
3446795e35a6SSam Tebbs   Type *VecTy =
3447795e35a6SSam Tebbs       ScalarPHI ? StartV->getType() : VectorType::get(StartV->getType(), VF);
344803975b7fSFlorian Hahn 
344903975b7fSFlorian Hahn   BasicBlock *HeaderBB = State.CFG.PrevBB;
3450b06a45c6SFlorian Hahn   assert(State.CurrentParentLoop->getHeader() == HeaderBB &&
345103975b7fSFlorian Hahn          "recipe must be in the vector loop header");
345206c3a7d2SFlorian Hahn   auto *Phi = PHINode::Create(VecTy, 2, "vec.phi");
345306c3a7d2SFlorian Hahn   Phi->insertBefore(HeaderBB->getFirstInsertionPt());
345457f5d8f2SFlorian Hahn   State.set(this, Phi, IsInLoop);
345503975b7fSFlorian Hahn 
345603975b7fSFlorian Hahn   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
345703975b7fSFlorian Hahn 
345803975b7fSFlorian Hahn   Value *Iden = nullptr;
345903975b7fSFlorian Hahn   RecurKind RK = RdxDesc.getRecurrenceKind();
34608ec40675SFlorian Hahn   unsigned CurrentPart = getUnrollPart(*this);
34618ec40675SFlorian Hahn 
346203975b7fSFlorian Hahn   if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
3463425e9e81SMel Chen       RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
3464425e9e81SMel Chen     // MinMax and AnyOf reductions have the start value as their identity.
346503975b7fSFlorian Hahn     if (ScalarPHI) {
346603975b7fSFlorian Hahn       Iden = StartV;
346703975b7fSFlorian Hahn     } else {
346803975b7fSFlorian Hahn       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
346903975b7fSFlorian Hahn       Builder.SetInsertPoint(VectorPH->getTerminator());
347057f5d8f2SFlorian Hahn       StartV = Iden = State.get(StartVPV);
347103975b7fSFlorian Hahn     }
3472b3cba9beSMel Chen   } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
3473b3cba9beSMel Chen     // [I|F]FindLastIV will use a sentinel value to initialize the reduction
34740e528ac4SFlorian Hahn     // phi or the resume value from the main vector loop when vectorizing the
34750e528ac4SFlorian Hahn     // epilogue loop. In the exit block, ComputeReductionResult will generate
34760e528ac4SFlorian Hahn     // checks to verify if the reduction result is the sentinel value. If the
34770e528ac4SFlorian Hahn     // result is the sentinel value, it will be corrected back to the start
34780e528ac4SFlorian Hahn     // value.
3479b3cba9beSMel Chen     // TODO: The sentinel value is not always necessary. When the start value is
3480b3cba9beSMel Chen     // a constant, and smaller than the start value of the induction variable,
3481b3cba9beSMel Chen     // the start value can be directly used to initialize the reduction phi.
3482eb59fe8dSFlorian Hahn     Iden = StartV;
3483b3cba9beSMel Chen     if (!ScalarPHI) {
3484b3cba9beSMel Chen       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3485b3cba9beSMel Chen       Builder.SetInsertPoint(VectorPH->getTerminator());
3486b3cba9beSMel Chen       StartV = Iden = Builder.CreateVectorSplat(State.VF, Iden);
3487b3cba9beSMel Chen     }
348803975b7fSFlorian Hahn   } else {
34893d9abfc9SPhilip Reames     Iden = llvm::getRecurrenceIdentity(RK, VecTy->getScalarType(),
349003975b7fSFlorian Hahn                                        RdxDesc.getFastMathFlags());
349103975b7fSFlorian Hahn 
349203975b7fSFlorian Hahn     if (!ScalarPHI) {
34938ec40675SFlorian Hahn       if (CurrentPart == 0) {
34948ec40675SFlorian Hahn         // Create start and identity vector values for the reduction in the
34958ec40675SFlorian Hahn         // preheader.
34968ec40675SFlorian Hahn         // TODO: Introduce recipes in VPlan preheader to create initial values.
3497795e35a6SSam Tebbs         Iden = Builder.CreateVectorSplat(VF, Iden);
349803975b7fSFlorian Hahn         IRBuilderBase::InsertPointGuard IPBuilder(Builder);
349903975b7fSFlorian Hahn         Builder.SetInsertPoint(VectorPH->getTerminator());
350003975b7fSFlorian Hahn         Constant *Zero = Builder.getInt32(0);
350103975b7fSFlorian Hahn         StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
35028ec40675SFlorian Hahn       } else {
3503795e35a6SSam Tebbs         Iden = Builder.CreateVectorSplat(VF, Iden);
35048ec40675SFlorian Hahn       }
350503975b7fSFlorian Hahn     }
350603975b7fSFlorian Hahn   }
350703975b7fSFlorian Hahn 
350857f5d8f2SFlorian Hahn   Phi = cast<PHINode>(State.get(this, IsInLoop));
35098ec40675SFlorian Hahn   Value *StartVal = (CurrentPart == 0) ? StartV : Iden;
351006c3a7d2SFlorian Hahn   Phi->addIncoming(StartVal, VectorPH);
351103975b7fSFlorian Hahn }
351203975b7fSFlorian Hahn 
351303975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
351403975b7fSFlorian Hahn void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
351503975b7fSFlorian Hahn                                  VPSlotTracker &SlotTracker) const {
351603975b7fSFlorian Hahn   O << Indent << "WIDEN-REDUCTION-PHI ";
351703975b7fSFlorian Hahn 
351803975b7fSFlorian Hahn   printAsOperand(O, SlotTracker);
351903975b7fSFlorian Hahn   O << " = phi ";
352003975b7fSFlorian Hahn   printOperands(O, SlotTracker);
3521795e35a6SSam Tebbs   if (VFScaleFactor != 1)
3522795e35a6SSam Tebbs     O << " (VF scaled by 1/" << VFScaleFactor << ")";
352303975b7fSFlorian Hahn }
352403975b7fSFlorian Hahn #endif
352503975b7fSFlorian Hahn 
352603975b7fSFlorian Hahn void VPWidenPHIRecipe::execute(VPTransformState &State) {
352703975b7fSFlorian Hahn   assert(EnableVPlanNativePath &&
352803975b7fSFlorian Hahn          "Non-native vplans are not expected to have VPWidenPHIRecipes.");
352903975b7fSFlorian Hahn 
3530aff1242bSElvis Wang   State.setDebugLocFrom(getDebugLoc());
353157f5d8f2SFlorian Hahn   Value *Op0 = State.get(getOperand(0));
353203975b7fSFlorian Hahn   Type *VecTy = Op0->getType();
353303975b7fSFlorian Hahn   Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
353457f5d8f2SFlorian Hahn   State.set(this, VecPhi);
353503975b7fSFlorian Hahn }
353603975b7fSFlorian Hahn 
353703975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
353803975b7fSFlorian Hahn void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
353903975b7fSFlorian Hahn                              VPSlotTracker &SlotTracker) const {
354003975b7fSFlorian Hahn   O << Indent << "WIDEN-PHI ";
354103975b7fSFlorian Hahn 
354203975b7fSFlorian Hahn   auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
354303975b7fSFlorian Hahn   // Unless all incoming values are modeled in VPlan  print the original PHI
354403975b7fSFlorian Hahn   // directly.
354503975b7fSFlorian Hahn   // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
354603975b7fSFlorian Hahn   // values as VPValues.
354703975b7fSFlorian Hahn   if (getNumOperands() != OriginalPhi->getNumOperands()) {
354803975b7fSFlorian Hahn     O << VPlanIngredient(OriginalPhi);
354903975b7fSFlorian Hahn     return;
355003975b7fSFlorian Hahn   }
355103975b7fSFlorian Hahn 
355203975b7fSFlorian Hahn   printAsOperand(O, SlotTracker);
355303975b7fSFlorian Hahn   O << " = phi ";
355403975b7fSFlorian Hahn   printOperands(O, SlotTracker);
355503975b7fSFlorian Hahn }
355603975b7fSFlorian Hahn #endif
355703fee671SDavid Sherwood 
355803fee671SDavid Sherwood // TODO: It would be good to use the existing VPWidenPHIRecipe instead and
355903fee671SDavid Sherwood // remove VPActiveLaneMaskPHIRecipe.
356003fee671SDavid Sherwood void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {
356103fee671SDavid Sherwood   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
356257f5d8f2SFlorian Hahn   Value *StartMask = State.get(getOperand(0));
356306c3a7d2SFlorian Hahn   PHINode *Phi =
356403fee671SDavid Sherwood       State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
356506c3a7d2SFlorian Hahn   Phi->addIncoming(StartMask, VectorPH);
356606c3a7d2SFlorian Hahn   Phi->setDebugLoc(getDebugLoc());
356757f5d8f2SFlorian Hahn   State.set(this, Phi);
356803fee671SDavid Sherwood }
356903fee671SDavid Sherwood 
357003fee671SDavid Sherwood #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
357103fee671SDavid Sherwood void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
357203fee671SDavid Sherwood                                       VPSlotTracker &SlotTracker) const {
357303fee671SDavid Sherwood   O << Indent << "ACTIVE-LANE-MASK-PHI ";
357403fee671SDavid Sherwood 
357503fee671SDavid Sherwood   printAsOperand(O, SlotTracker);
357603fee671SDavid Sherwood   O << " = phi ";
357703fee671SDavid Sherwood   printOperands(O, SlotTracker);
357803fee671SDavid Sherwood }
357903fee671SDavid Sherwood #endif
3580413a66f3SAlexey Bataev 
3581a7fda0e1SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3582a7fda0e1SFlorian Hahn void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
3583a7fda0e1SFlorian Hahn                                   VPSlotTracker &SlotTracker) const {
3584a7fda0e1SFlorian Hahn   O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
3585a7fda0e1SFlorian Hahn 
3586a7fda0e1SFlorian Hahn   printAsOperand(O, SlotTracker);
3587a7fda0e1SFlorian Hahn   O << " = phi ";
3588a7fda0e1SFlorian Hahn   printOperands(O, SlotTracker);
3589a7fda0e1SFlorian Hahn }
3590a7fda0e1SFlorian Hahn #endif
3591a7fda0e1SFlorian Hahn 
3592a7fda0e1SFlorian Hahn void VPScalarPHIRecipe::execute(VPTransformState &State) {
3593413a66f3SAlexey Bataev   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3594afef545eSFlorian Hahn   Value *Start = State.get(getStartValue(), VPLane(0));
3595a7fda0e1SFlorian Hahn   PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name);
359606c3a7d2SFlorian Hahn   Phi->addIncoming(Start, VectorPH);
359706c3a7d2SFlorian Hahn   Phi->setDebugLoc(getDebugLoc());
359857f5d8f2SFlorian Hahn   State.set(this, Phi, /*IsScalar=*/true);
3599413a66f3SAlexey Bataev }
3600413a66f3SAlexey Bataev 
3601413a66f3SAlexey Bataev #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3602a7fda0e1SFlorian Hahn void VPScalarPHIRecipe::print(raw_ostream &O, const Twine &Indent,
3603413a66f3SAlexey Bataev                               VPSlotTracker &SlotTracker) const {
3604a7fda0e1SFlorian Hahn   O << Indent << "SCALAR-PHI ";
3605413a66f3SAlexey Bataev   printAsOperand(O, SlotTracker);
3606413a66f3SAlexey Bataev   O << " = phi ";
3607413a66f3SAlexey Bataev   printOperands(O, SlotTracker);
3608413a66f3SAlexey Bataev }
3609413a66f3SAlexey Bataev #endif
3610