103975b7fSFlorian Hahn //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===// 203975b7fSFlorian Hahn // 303975b7fSFlorian Hahn // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 403975b7fSFlorian Hahn // See https://llvm.org/LICENSE.txt for license information. 503975b7fSFlorian Hahn // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 603975b7fSFlorian Hahn // 703975b7fSFlorian Hahn //===----------------------------------------------------------------------===// 803975b7fSFlorian Hahn /// 903975b7fSFlorian Hahn /// \file 1003975b7fSFlorian Hahn /// This file contains implementations for different VPlan recipes. 1103975b7fSFlorian Hahn /// 1203975b7fSFlorian Hahn //===----------------------------------------------------------------------===// 1303975b7fSFlorian Hahn 1409a29fccSFlorian Hahn #include "LoopVectorizationPlanner.h" 1503975b7fSFlorian Hahn #include "VPlan.h" 16b0b88643SFlorian Hahn #include "VPlanAnalysis.h" 171d9b3222SFlorian Hahn #include "VPlanPatternMatch.h" 1871ede8d8SRamkumar Ramachandra #include "VPlanUtils.h" 1903975b7fSFlorian Hahn #include "llvm/ADT/STLExtras.h" 2003975b7fSFlorian Hahn #include "llvm/ADT/SmallVector.h" 2103975b7fSFlorian Hahn #include "llvm/ADT/Twine.h" 2203975b7fSFlorian Hahn #include "llvm/Analysis/IVDescriptors.h" 2303975b7fSFlorian Hahn #include "llvm/IR/BasicBlock.h" 2403975b7fSFlorian Hahn #include "llvm/IR/IRBuilder.h" 2503975b7fSFlorian Hahn #include "llvm/IR/Instruction.h" 2603975b7fSFlorian Hahn #include "llvm/IR/Instructions.h" 276f1a8c2dSGraham Hunter #include "llvm/IR/Intrinsics.h" 2803975b7fSFlorian Hahn #include "llvm/IR/Type.h" 2903975b7fSFlorian Hahn #include "llvm/IR/Value.h" 3000e40c9bSKolya Panchenko #include "llvm/IR/VectorBuilder.h" 3103975b7fSFlorian Hahn #include "llvm/Support/Casting.h" 3203975b7fSFlorian Hahn #include "llvm/Support/CommandLine.h" 3303975b7fSFlorian Hahn #include "llvm/Support/Debug.h" 3403975b7fSFlorian Hahn #include "llvm/Support/raw_ostream.h" 35225e3ec6SFlorian Hahn #include "llvm/Transforms/Utils/BasicBlockUtils.h" 36241fe837SFlorian Hahn #include "llvm/Transforms/Utils/LoopUtils.h" 3703975b7fSFlorian Hahn #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 3803975b7fSFlorian Hahn #include <cassert> 3903975b7fSFlorian Hahn 4003975b7fSFlorian Hahn using namespace llvm; 4103975b7fSFlorian Hahn 425d135041SFlorian Hahn using VectorParts = SmallVector<Value *, 2>; 435d135041SFlorian Hahn 441e692113SFangrui Song namespace llvm { 4503975b7fSFlorian Hahn extern cl::opt<bool> EnableVPlanNativePath; 461e692113SFangrui Song } 47b841e2ecSFlorian Hahn extern cl::opt<unsigned> ForceTargetInstructionCost; 4803975b7fSFlorian Hahn 4913ae2134SFlorian Hahn #define LV_NAME "loop-vectorize" 5013ae2134SFlorian Hahn #define DEBUG_TYPE LV_NAME 5113ae2134SFlorian Hahn 5203975b7fSFlorian Hahn bool VPRecipeBase::mayWriteToMemory() const { 5303975b7fSFlorian Hahn switch (getVPDefID()) { 5468ed1728SFlorian Hahn case VPInstructionSC: 55f0d5104cSLuke Lau return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory(); 563d422a98SShih-Po Hung case VPInterleaveSC: 573d422a98SShih-Po Hung return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0; 58e2a72fa5SFlorian Hahn case VPWidenStoreEVLSC: 59a9bafe91SFlorian Hahn case VPWidenStoreSC: 60a9bafe91SFlorian Hahn return true; 6103975b7fSFlorian Hahn case VPReplicateSC: 6203975b7fSFlorian Hahn return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 6303975b7fSFlorian Hahn ->mayWriteToMemory(); 64e846778eSFlorian Hahn case VPWidenCallSC: 65e846778eSFlorian Hahn return !cast<VPWidenCallRecipe>(this) 66e846778eSFlorian Hahn ->getCalledScalarFunction() 67e846778eSFlorian Hahn ->onlyReadsMemory(); 68a4819bd4SFlorian Hahn case VPWidenIntrinsicSC: 696fbbe152SFlorian Hahn return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory(); 7003975b7fSFlorian Hahn case VPBranchOnMaskSC: 713c5f0734SFlorian Hahn case VPScalarIVStepsSC: 725368536cSFlorian Hahn case VPPredInstPHISC: 7303975b7fSFlorian Hahn return false; 7403975b7fSFlorian Hahn case VPBlendSC: 754eb30cfbSMel Chen case VPReductionEVLSC: 7603975b7fSFlorian Hahn case VPReductionSC: 773860e29eSFlorian Hahn case VPVectorPointerSC: 781b05e749SFlorian Hahn case VPWidenCanonicalIVSC: 79e3afe0b8SFlorian Hahn case VPWidenCastSC: 801b05e749SFlorian Hahn case VPWidenGEPSC: 811b05e749SFlorian Hahn case VPWidenIntOrFpInductionSC: 82e2a72fa5SFlorian Hahn case VPWidenLoadEVLSC: 83a9bafe91SFlorian Hahn case VPWidenLoadSC: 841b05e749SFlorian Hahn case VPWidenPHISC: 851b05e749SFlorian Hahn case VPWidenSC: 8600e40c9bSKolya Panchenko case VPWidenEVLSC: 8703975b7fSFlorian Hahn case VPWidenSelectSC: { 8803975b7fSFlorian Hahn const Instruction *I = 8903975b7fSFlorian Hahn dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 9003975b7fSFlorian Hahn (void)I; 9103975b7fSFlorian Hahn assert((!I || !I->mayWriteToMemory()) && 9203975b7fSFlorian Hahn "underlying instruction may write to memory"); 9303975b7fSFlorian Hahn return false; 9403975b7fSFlorian Hahn } 9503975b7fSFlorian Hahn default: 9603975b7fSFlorian Hahn return true; 9703975b7fSFlorian Hahn } 9803975b7fSFlorian Hahn } 9903975b7fSFlorian Hahn 10003975b7fSFlorian Hahn bool VPRecipeBase::mayReadFromMemory() const { 10103975b7fSFlorian Hahn switch (getVPDefID()) { 102f0d5104cSLuke Lau case VPInstructionSC: 103f0d5104cSLuke Lau return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory(); 104e2a72fa5SFlorian Hahn case VPWidenLoadEVLSC: 105a9bafe91SFlorian Hahn case VPWidenLoadSC: 106a9bafe91SFlorian Hahn return true; 10703975b7fSFlorian Hahn case VPReplicateSC: 10803975b7fSFlorian Hahn return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 10903975b7fSFlorian Hahn ->mayReadFromMemory(); 110e846778eSFlorian Hahn case VPWidenCallSC: 111e846778eSFlorian Hahn return !cast<VPWidenCallRecipe>(this) 112e846778eSFlorian Hahn ->getCalledScalarFunction() 113e846778eSFlorian Hahn ->onlyWritesMemory(); 1146fbbe152SFlorian Hahn case VPWidenIntrinsicSC: 1156fbbe152SFlorian Hahn return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory(); 11603975b7fSFlorian Hahn case VPBranchOnMaskSC: 117cf2d436bSFlorian Hahn case VPPredInstPHISC: 118a9bafe91SFlorian Hahn case VPScalarIVStepsSC: 119e2a72fa5SFlorian Hahn case VPWidenStoreEVLSC: 120a9bafe91SFlorian Hahn case VPWidenStoreSC: 12103975b7fSFlorian Hahn return false; 12203975b7fSFlorian Hahn case VPBlendSC: 1234eb30cfbSMel Chen case VPReductionEVLSC: 12403975b7fSFlorian Hahn case VPReductionSC: 1253860e29eSFlorian Hahn case VPVectorPointerSC: 1261b05e749SFlorian Hahn case VPWidenCanonicalIVSC: 127e3afe0b8SFlorian Hahn case VPWidenCastSC: 1281b05e749SFlorian Hahn case VPWidenGEPSC: 1291b05e749SFlorian Hahn case VPWidenIntOrFpInductionSC: 1301b05e749SFlorian Hahn case VPWidenPHISC: 1311b05e749SFlorian Hahn case VPWidenSC: 13200e40c9bSKolya Panchenko case VPWidenEVLSC: 13303975b7fSFlorian Hahn case VPWidenSelectSC: { 13403975b7fSFlorian Hahn const Instruction *I = 13503975b7fSFlorian Hahn dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 13603975b7fSFlorian Hahn (void)I; 13703975b7fSFlorian Hahn assert((!I || !I->mayReadFromMemory()) && 13803975b7fSFlorian Hahn "underlying instruction may read from memory"); 13903975b7fSFlorian Hahn return false; 14003975b7fSFlorian Hahn } 14103975b7fSFlorian Hahn default: 14203975b7fSFlorian Hahn return true; 14303975b7fSFlorian Hahn } 14403975b7fSFlorian Hahn } 14503975b7fSFlorian Hahn 14603975b7fSFlorian Hahn bool VPRecipeBase::mayHaveSideEffects() const { 14703975b7fSFlorian Hahn switch (getVPDefID()) { 1480c5df7cdSFlorian Hahn case VPDerivedIVSC: 14916e0620dSFlorian Hahn case VPPredInstPHISC: 1500ab539fdSFlorian Hahn case VPScalarCastSC: 151266ff98cSShih-Po Hung case VPReverseVectorPointerSC: 15216e0620dSFlorian Hahn return false; 15334d25924SFlorian Hahn case VPInstructionSC: 15468ed1728SFlorian Hahn return mayWriteToMemory(); 155e846778eSFlorian Hahn case VPWidenCallSC: { 156e846778eSFlorian Hahn Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction(); 157e846778eSFlorian Hahn return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn(); 158e846778eSFlorian Hahn } 1596fbbe152SFlorian Hahn case VPWidenIntrinsicSC: 1606fbbe152SFlorian Hahn return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects(); 16103975b7fSFlorian Hahn case VPBlendSC: 1624eb30cfbSMel Chen case VPReductionEVLSC: 16303975b7fSFlorian Hahn case VPReductionSC: 1641b05e749SFlorian Hahn case VPScalarIVStepsSC: 1653860e29eSFlorian Hahn case VPVectorPointerSC: 1661b05e749SFlorian Hahn case VPWidenCanonicalIVSC: 167e3afe0b8SFlorian Hahn case VPWidenCastSC: 1681b05e749SFlorian Hahn case VPWidenGEPSC: 1691b05e749SFlorian Hahn case VPWidenIntOrFpInductionSC: 1701b05e749SFlorian Hahn case VPWidenPHISC: 1711b05e749SFlorian Hahn case VPWidenPointerInductionSC: 1721b05e749SFlorian Hahn case VPWidenSC: 17300e40c9bSKolya Panchenko case VPWidenEVLSC: 1741b05e749SFlorian Hahn case VPWidenSelectSC: { 17503975b7fSFlorian Hahn const Instruction *I = 17603975b7fSFlorian Hahn dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 17703975b7fSFlorian Hahn (void)I; 17803975b7fSFlorian Hahn assert((!I || !I->mayHaveSideEffects()) && 17903975b7fSFlorian Hahn "underlying instruction has side-effects"); 18003975b7fSFlorian Hahn return false; 18103975b7fSFlorian Hahn } 1823d422a98SShih-Po Hung case VPInterleaveSC: 1833d422a98SShih-Po Hung return mayWriteToMemory(); 184e2a72fa5SFlorian Hahn case VPWidenLoadEVLSC: 185a9bafe91SFlorian Hahn case VPWidenLoadSC: 186e2a72fa5SFlorian Hahn case VPWidenStoreEVLSC: 187a9bafe91SFlorian Hahn case VPWidenStoreSC: 188a9bafe91SFlorian Hahn assert( 189a9bafe91SFlorian Hahn cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() == 190a9bafe91SFlorian Hahn mayWriteToMemory() && 19132efff59SFlorian Hahn "mayHaveSideffects result for ingredient differs from this " 19232efff59SFlorian Hahn "implementation"); 19332efff59SFlorian Hahn return mayWriteToMemory(); 19403975b7fSFlorian Hahn case VPReplicateSC: { 19503975b7fSFlorian Hahn auto *R = cast<VPReplicateRecipe>(this); 19603975b7fSFlorian Hahn return R->getUnderlyingInstr()->mayHaveSideEffects(); 19703975b7fSFlorian Hahn } 19803975b7fSFlorian Hahn default: 19903975b7fSFlorian Hahn return true; 20003975b7fSFlorian Hahn } 20103975b7fSFlorian Hahn } 20203975b7fSFlorian Hahn 20303975b7fSFlorian Hahn void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { 20403975b7fSFlorian Hahn assert(!Parent && "Recipe already in some VPBasicBlock"); 20503975b7fSFlorian Hahn assert(InsertPos->getParent() && 20603975b7fSFlorian Hahn "Insertion position not in any VPBasicBlock"); 2079277a323SFlorian Hahn InsertPos->getParent()->insert(this, InsertPos->getIterator()); 20803975b7fSFlorian Hahn } 20903975b7fSFlorian Hahn 21003975b7fSFlorian Hahn void VPRecipeBase::insertBefore(VPBasicBlock &BB, 21103975b7fSFlorian Hahn iplist<VPRecipeBase>::iterator I) { 21203975b7fSFlorian Hahn assert(!Parent && "Recipe already in some VPBasicBlock"); 21303975b7fSFlorian Hahn assert(I == BB.end() || I->getParent() == &BB); 2149277a323SFlorian Hahn BB.insert(this, I); 21503975b7fSFlorian Hahn } 21603975b7fSFlorian Hahn 21703975b7fSFlorian Hahn void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { 21803975b7fSFlorian Hahn assert(!Parent && "Recipe already in some VPBasicBlock"); 21903975b7fSFlorian Hahn assert(InsertPos->getParent() && 22003975b7fSFlorian Hahn "Insertion position not in any VPBasicBlock"); 2219277a323SFlorian Hahn InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator())); 22203975b7fSFlorian Hahn } 22303975b7fSFlorian Hahn 22403975b7fSFlorian Hahn void VPRecipeBase::removeFromParent() { 22503975b7fSFlorian Hahn assert(getParent() && "Recipe not in any VPBasicBlock"); 22603975b7fSFlorian Hahn getParent()->getRecipeList().remove(getIterator()); 22703975b7fSFlorian Hahn Parent = nullptr; 22803975b7fSFlorian Hahn } 22903975b7fSFlorian Hahn 23003975b7fSFlorian Hahn iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { 23103975b7fSFlorian Hahn assert(getParent() && "Recipe not in any VPBasicBlock"); 23203975b7fSFlorian Hahn return getParent()->getRecipeList().erase(getIterator()); 23303975b7fSFlorian Hahn } 23403975b7fSFlorian Hahn 23503975b7fSFlorian Hahn void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { 23603975b7fSFlorian Hahn removeFromParent(); 23703975b7fSFlorian Hahn insertAfter(InsertPos); 23803975b7fSFlorian Hahn } 23903975b7fSFlorian Hahn 24003975b7fSFlorian Hahn void VPRecipeBase::moveBefore(VPBasicBlock &BB, 24103975b7fSFlorian Hahn iplist<VPRecipeBase>::iterator I) { 24203975b7fSFlorian Hahn removeFromParent(); 24303975b7fSFlorian Hahn insertBefore(BB, I); 24403975b7fSFlorian Hahn } 24503975b7fSFlorian Hahn 246b841e2ecSFlorian Hahn InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) { 247fa3258ecSFlorian Hahn // Get the underlying instruction for the recipe, if there is one. It is used 248fa3258ecSFlorian Hahn // to 249fa3258ecSFlorian Hahn // * decide if cost computation should be skipped for this recipe, 250fa3258ecSFlorian Hahn // * apply forced target instruction cost. 251fa3258ecSFlorian Hahn Instruction *UI = nullptr; 252fa3258ecSFlorian Hahn if (auto *S = dyn_cast<VPSingleDefRecipe>(this)) 253fa3258ecSFlorian Hahn UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue()); 254fa3258ecSFlorian Hahn else if (auto *IG = dyn_cast<VPInterleaveRecipe>(this)) 255fa3258ecSFlorian Hahn UI = IG->getInsertPos(); 256fa3258ecSFlorian Hahn else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this)) 257fa3258ecSFlorian Hahn UI = &WidenMem->getIngredient(); 258b841e2ecSFlorian Hahn 259fa3258ecSFlorian Hahn InstructionCost RecipeCost; 260fa3258ecSFlorian Hahn if (UI && Ctx.skipCostComputation(UI, VF.isVector())) { 261fa3258ecSFlorian Hahn RecipeCost = 0; 262fa3258ecSFlorian Hahn } else { 263fa3258ecSFlorian Hahn RecipeCost = computeCost(VF, Ctx); 264bb60dd39SFlorian Hahn if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 && 265b841e2ecSFlorian Hahn RecipeCost.isValid()) 266b841e2ecSFlorian Hahn RecipeCost = InstructionCost(ForceTargetInstructionCost); 267fa3258ecSFlorian Hahn } 268b841e2ecSFlorian Hahn 269b841e2ecSFlorian Hahn LLVM_DEBUG({ 270b841e2ecSFlorian Hahn dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": "; 271b841e2ecSFlorian Hahn dump(); 272b841e2ecSFlorian Hahn }); 273b841e2ecSFlorian Hahn return RecipeCost; 274b841e2ecSFlorian Hahn } 275b841e2ecSFlorian Hahn 276b841e2ecSFlorian Hahn InstructionCost VPRecipeBase::computeCost(ElementCount VF, 277b841e2ecSFlorian Hahn VPCostContext &Ctx) const { 278fa3258ecSFlorian Hahn llvm_unreachable("subclasses should implement computeCost"); 279fa3258ecSFlorian Hahn } 280fa3258ecSFlorian Hahn 281795e35a6SSam Tebbs InstructionCost 282795e35a6SSam Tebbs VPPartialReductionRecipe::computeCost(ElementCount VF, 283795e35a6SSam Tebbs VPCostContext &Ctx) const { 284795e35a6SSam Tebbs std::optional<unsigned> Opcode = std::nullopt; 285795e35a6SSam Tebbs VPRecipeBase *BinOpR = getOperand(0)->getDefiningRecipe(); 286795e35a6SSam Tebbs if (auto *WidenR = dyn_cast<VPWidenRecipe>(BinOpR)) 287795e35a6SSam Tebbs Opcode = std::make_optional(WidenR->getOpcode()); 288795e35a6SSam Tebbs 289795e35a6SSam Tebbs VPRecipeBase *ExtAR = BinOpR->getOperand(0)->getDefiningRecipe(); 290795e35a6SSam Tebbs VPRecipeBase *ExtBR = BinOpR->getOperand(1)->getDefiningRecipe(); 291795e35a6SSam Tebbs 292795e35a6SSam Tebbs auto *PhiType = Ctx.Types.inferScalarType(getOperand(1)); 293795e35a6SSam Tebbs auto *InputTypeA = Ctx.Types.inferScalarType(ExtAR ? ExtAR->getOperand(0) 294795e35a6SSam Tebbs : BinOpR->getOperand(0)); 295795e35a6SSam Tebbs auto *InputTypeB = Ctx.Types.inferScalarType(ExtBR ? ExtBR->getOperand(0) 296795e35a6SSam Tebbs : BinOpR->getOperand(1)); 297795e35a6SSam Tebbs 298795e35a6SSam Tebbs auto GetExtendKind = [](VPRecipeBase *R) { 299795e35a6SSam Tebbs // The extend could come from outside the plan. 300795e35a6SSam Tebbs if (!R) 301795e35a6SSam Tebbs return TargetTransformInfo::PR_None; 302795e35a6SSam Tebbs auto *WidenCastR = dyn_cast<VPWidenCastRecipe>(R); 303795e35a6SSam Tebbs if (!WidenCastR) 304795e35a6SSam Tebbs return TargetTransformInfo::PR_None; 305795e35a6SSam Tebbs if (WidenCastR->getOpcode() == Instruction::CastOps::ZExt) 306795e35a6SSam Tebbs return TargetTransformInfo::PR_ZeroExtend; 307795e35a6SSam Tebbs if (WidenCastR->getOpcode() == Instruction::CastOps::SExt) 308795e35a6SSam Tebbs return TargetTransformInfo::PR_SignExtend; 309795e35a6SSam Tebbs return TargetTransformInfo::PR_None; 310795e35a6SSam Tebbs }; 311795e35a6SSam Tebbs 312795e35a6SSam Tebbs return Ctx.TTI.getPartialReductionCost(getOpcode(), InputTypeA, InputTypeB, 313795e35a6SSam Tebbs PhiType, VF, GetExtendKind(ExtAR), 314795e35a6SSam Tebbs GetExtendKind(ExtBR), Opcode); 315795e35a6SSam Tebbs } 316795e35a6SSam Tebbs 317795e35a6SSam Tebbs void VPPartialReductionRecipe::execute(VPTransformState &State) { 318795e35a6SSam Tebbs State.setDebugLocFrom(getDebugLoc()); 319795e35a6SSam Tebbs auto &Builder = State.Builder; 320795e35a6SSam Tebbs 321795e35a6SSam Tebbs assert(getOpcode() == Instruction::Add && 322795e35a6SSam Tebbs "Unhandled partial reduction opcode"); 323795e35a6SSam Tebbs 324795e35a6SSam Tebbs Value *BinOpVal = State.get(getOperand(0)); 325795e35a6SSam Tebbs Value *PhiVal = State.get(getOperand(1)); 326795e35a6SSam Tebbs assert(PhiVal && BinOpVal && "Phi and Mul must be set"); 327795e35a6SSam Tebbs 328795e35a6SSam Tebbs Type *RetTy = PhiVal->getType(); 329795e35a6SSam Tebbs 330795e35a6SSam Tebbs CallInst *V = Builder.CreateIntrinsic( 331795e35a6SSam Tebbs RetTy, Intrinsic::experimental_vector_partial_reduce_add, 332795e35a6SSam Tebbs {PhiVal, BinOpVal}, nullptr, "partial.reduce"); 333795e35a6SSam Tebbs 334795e35a6SSam Tebbs State.set(this, V); 335795e35a6SSam Tebbs } 336795e35a6SSam Tebbs 337795e35a6SSam Tebbs #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 338795e35a6SSam Tebbs void VPPartialReductionRecipe::print(raw_ostream &O, const Twine &Indent, 339795e35a6SSam Tebbs VPSlotTracker &SlotTracker) const { 340795e35a6SSam Tebbs O << Indent << "PARTIAL-REDUCE "; 341795e35a6SSam Tebbs printAsOperand(O, SlotTracker); 342795e35a6SSam Tebbs O << " = " << Instruction::getOpcodeName(getOpcode()) << " "; 343795e35a6SSam Tebbs printOperands(O, SlotTracker); 344795e35a6SSam Tebbs } 345795e35a6SSam Tebbs #endif 346795e35a6SSam Tebbs 3470b17e9d2SFlorian Hahn FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const { 3480b17e9d2SFlorian Hahn assert(OpType == OperationType::FPMathOp && 3490b17e9d2SFlorian Hahn "recipe doesn't have fast math flags"); 3500b17e9d2SFlorian Hahn FastMathFlags Res; 3510b17e9d2SFlorian Hahn Res.setAllowReassoc(FMFs.AllowReassoc); 3520b17e9d2SFlorian Hahn Res.setNoNaNs(FMFs.NoNaNs); 3530b17e9d2SFlorian Hahn Res.setNoInfs(FMFs.NoInfs); 3540b17e9d2SFlorian Hahn Res.setNoSignedZeros(FMFs.NoSignedZeros); 3550b17e9d2SFlorian Hahn Res.setAllowReciprocal(FMFs.AllowReciprocal); 3560b17e9d2SFlorian Hahn Res.setAllowContract(FMFs.AllowContract); 3570b17e9d2SFlorian Hahn Res.setApproxFunc(FMFs.ApproxFunc); 3580b17e9d2SFlorian Hahn return Res; 3590b17e9d2SFlorian Hahn } 3600b17e9d2SFlorian Hahn 36181bbe193SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 36281bbe193SFlorian Hahn void VPSingleDefRecipe::dump() const { VPDef::dump(); } 36381bbe193SFlorian Hahn #endif 36481bbe193SFlorian Hahn 3658ec40675SFlorian Hahn template <unsigned PartOpIdx> 3668ec40675SFlorian Hahn VPValue * 3678ec40675SFlorian Hahn VPUnrollPartAccessor<PartOpIdx>::getUnrollPartOperand(VPUser &U) const { 3688ec40675SFlorian Hahn if (U.getNumOperands() == PartOpIdx + 1) 3698ec40675SFlorian Hahn return U.getOperand(PartOpIdx); 3708ec40675SFlorian Hahn return nullptr; 3718ec40675SFlorian Hahn } 3728ec40675SFlorian Hahn 3738ec40675SFlorian Hahn template <unsigned PartOpIdx> 3748ec40675SFlorian Hahn unsigned VPUnrollPartAccessor<PartOpIdx>::getUnrollPart(VPUser &U) const { 3758ec40675SFlorian Hahn if (auto *UnrollPartOp = getUnrollPartOperand(U)) 3768ec40675SFlorian Hahn return cast<ConstantInt>(UnrollPartOp->getLiveInIRValue())->getZExtValue(); 3778ec40675SFlorian Hahn return 0; 3788ec40675SFlorian Hahn } 3798ec40675SFlorian Hahn 380fd661957SFlorian Hahn VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, 381fd661957SFlorian Hahn VPValue *A, VPValue *B, DebugLoc DL, 382fd661957SFlorian Hahn const Twine &Name) 383fd661957SFlorian Hahn : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}), 384165e24aaSFlorian Hahn Pred, DL), 385abdb61f5SFlorian Hahn Opcode(Opcode), Name(Name.str()) { 386fd661957SFlorian Hahn assert(Opcode == Instruction::ICmp && 387fd661957SFlorian Hahn "only ICmp predicates supported at the moment"); 388fd661957SFlorian Hahn } 389fd661957SFlorian Hahn 390698ae660SFlorian Hahn VPInstruction::VPInstruction(unsigned Opcode, 391698ae660SFlorian Hahn std::initializer_list<VPValue *> Operands, 392698ae660SFlorian Hahn FastMathFlags FMFs, DebugLoc DL, const Twine &Name) 393165e24aaSFlorian Hahn : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL), 394abdb61f5SFlorian Hahn Opcode(Opcode), Name(Name.str()) { 395698ae660SFlorian Hahn // Make sure the VPInstruction is a floating-point operation. 396698ae660SFlorian Hahn assert(isFPMathOp() && "this op can't take fast-math flags"); 397698ae660SFlorian Hahn } 398698ae660SFlorian Hahn 39906bb8c9fSFlorian Hahn bool VPInstruction::doesGeneratePerAllLanes() const { 40006bb8c9fSFlorian Hahn return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this); 40106bb8c9fSFlorian Hahn } 40206bb8c9fSFlorian Hahn 40306bb8c9fSFlorian Hahn bool VPInstruction::canGenerateScalarForFirstLane() const { 40406bb8c9fSFlorian Hahn if (Instruction::isBinaryOp(getOpcode())) 40506bb8c9fSFlorian Hahn return true; 4069a5a8731SFlorian Hahn if (isSingleScalar() || isVectorToScalar()) 40707b33013SFlorian Hahn return true; 40806bb8c9fSFlorian Hahn switch (Opcode) { 40999d6c6d9SFlorian Hahn case Instruction::ICmp: 410f148d579SAlexey Bataev case Instruction::Select: 41106bb8c9fSFlorian Hahn case VPInstruction::BranchOnCond: 41206bb8c9fSFlorian Hahn case VPInstruction::BranchOnCount: 41306bb8c9fSFlorian Hahn case VPInstruction::CalculateTripCountMinusVF: 41406bb8c9fSFlorian Hahn case VPInstruction::CanonicalIVIncrementForPart: 41506bb8c9fSFlorian Hahn case VPInstruction::PtrAdd: 416413a66f3SAlexey Bataev case VPInstruction::ExplicitVectorLength: 4175fae408dSFlorian Hahn case VPInstruction::AnyOf: 41806bb8c9fSFlorian Hahn return true; 41906bb8c9fSFlorian Hahn default: 42006bb8c9fSFlorian Hahn return false; 42106bb8c9fSFlorian Hahn } 42206bb8c9fSFlorian Hahn } 42306bb8c9fSFlorian Hahn 42406bb8c9fSFlorian Hahn Value *VPInstruction::generatePerLane(VPTransformState &State, 425aae7ac66SFlorian Hahn const VPLane &Lane) { 42603975b7fSFlorian Hahn IRBuilderBase &Builder = State.Builder; 42706bb8c9fSFlorian Hahn 42806bb8c9fSFlorian Hahn assert(getOpcode() == VPInstruction::PtrAdd && 42906bb8c9fSFlorian Hahn "only PtrAdd opcodes are supported for now"); 43006bb8c9fSFlorian Hahn return Builder.CreatePtrAdd(State.get(getOperand(0), Lane), 43106bb8c9fSFlorian Hahn State.get(getOperand(1), Lane), Name); 43206bb8c9fSFlorian Hahn } 43306bb8c9fSFlorian Hahn 43406c3a7d2SFlorian Hahn Value *VPInstruction::generate(VPTransformState &State) { 43506bb8c9fSFlorian Hahn IRBuilderBase &Builder = State.Builder; 43603975b7fSFlorian Hahn 43703975b7fSFlorian Hahn if (Instruction::isBinaryOp(getOpcode())) { 438911055e3SFlorian Hahn bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); 43957f5d8f2SFlorian Hahn Value *A = State.get(getOperand(0), OnlyFirstLaneUsed); 44057f5d8f2SFlorian Hahn Value *B = State.get(getOperand(1), OnlyFirstLaneUsed); 441a5891fa4SFlorian Hahn auto *Res = 442a5891fa4SFlorian Hahn Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name); 443a5891fa4SFlorian Hahn if (auto *I = dyn_cast<Instruction>(Res)) 444a5891fa4SFlorian Hahn setFlags(I); 445a5891fa4SFlorian Hahn return Res; 44603975b7fSFlorian Hahn } 44703975b7fSFlorian Hahn 44803975b7fSFlorian Hahn switch (getOpcode()) { 44903975b7fSFlorian Hahn case VPInstruction::Not: { 45057f5d8f2SFlorian Hahn Value *A = State.get(getOperand(0)); 4512265bb06SFlorian Hahn return Builder.CreateNot(A, Name); 45203975b7fSFlorian Hahn } 453fd661957SFlorian Hahn case Instruction::ICmp: { 45499d6c6d9SFlorian Hahn bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); 45557f5d8f2SFlorian Hahn Value *A = State.get(getOperand(0), OnlyFirstLaneUsed); 45657f5d8f2SFlorian Hahn Value *B = State.get(getOperand(1), OnlyFirstLaneUsed); 457fd661957SFlorian Hahn return Builder.CreateCmp(getPredicate(), A, B, Name); 45803975b7fSFlorian Hahn } 45903975b7fSFlorian Hahn case Instruction::Select: { 460f148d579SAlexey Bataev bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); 461f148d579SAlexey Bataev Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed); 462f148d579SAlexey Bataev Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed); 463f148d579SAlexey Bataev Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed); 4642265bb06SFlorian Hahn return Builder.CreateSelect(Cond, Op1, Op2, Name); 46503975b7fSFlorian Hahn } 46603975b7fSFlorian Hahn case VPInstruction::ActiveLaneMask: { 46703975b7fSFlorian Hahn // Get first lane of vector induction variable. 468aae7ac66SFlorian Hahn Value *VIVElem0 = State.get(getOperand(0), VPLane(0)); 46903975b7fSFlorian Hahn // Get the original loop tripcount. 470aae7ac66SFlorian Hahn Value *ScalarTC = State.get(getOperand(1), VPLane(0)); 47103975b7fSFlorian Hahn 472012d2171SCameron McInally // If this part of the active lane mask is scalar, generate the CMP directly 473012d2171SCameron McInally // to avoid unnecessary extracts. 474012d2171SCameron McInally if (State.VF.isScalar()) 475012d2171SCameron McInally return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC, 476012d2171SCameron McInally Name); 477012d2171SCameron McInally 47803975b7fSFlorian Hahn auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); 47903975b7fSFlorian Hahn auto *PredTy = VectorType::get(Int1Ty, State.VF); 4802265bb06SFlorian Hahn return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask, 4812265bb06SFlorian Hahn {PredTy, ScalarTC->getType()}, 48202d6950dSDavid Sherwood {VIVElem0, ScalarTC}, nullptr, Name); 48303975b7fSFlorian Hahn } 48403975b7fSFlorian Hahn case VPInstruction::FirstOrderRecurrenceSplice: { 48503975b7fSFlorian Hahn // Generate code to combine the previous and current values in vector v3. 48603975b7fSFlorian Hahn // 48703975b7fSFlorian Hahn // vector.ph: 48803975b7fSFlorian Hahn // v_init = vector(..., ..., ..., a[-1]) 48903975b7fSFlorian Hahn // br vector.body 49003975b7fSFlorian Hahn // 49103975b7fSFlorian Hahn // vector.body 49203975b7fSFlorian Hahn // i = phi [0, vector.ph], [i+4, vector.body] 49303975b7fSFlorian Hahn // v1 = phi [v_init, vector.ph], [v2, vector.body] 49403975b7fSFlorian Hahn // v2 = a[i, i+1, i+2, i+3]; 49503975b7fSFlorian Hahn // v3 = vector(v1(3), v2(0, 1, 2)) 49603975b7fSFlorian Hahn 49757f5d8f2SFlorian Hahn auto *V1 = State.get(getOperand(0)); 49806c3a7d2SFlorian Hahn if (!V1->getType()->isVectorTy()) 49906c3a7d2SFlorian Hahn return V1; 50057f5d8f2SFlorian Hahn Value *V2 = State.get(getOperand(1)); 50106c3a7d2SFlorian Hahn return Builder.CreateVectorSplice(V1, V2, -1, Name); 50203975b7fSFlorian Hahn } 503fe1b51ffSSander de Smalen case VPInstruction::CalculateTripCountMinusVF: { 5048ec40675SFlorian Hahn unsigned UF = getParent()->getPlan()->getUF(); 505aae7ac66SFlorian Hahn Value *ScalarTC = State.get(getOperand(0), VPLane(0)); 5068ec40675SFlorian Hahn Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF); 507fe1b51ffSSander de Smalen Value *Sub = Builder.CreateSub(ScalarTC, Step); 508fe1b51ffSSander de Smalen Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step); 509fe1b51ffSSander de Smalen Value *Zero = ConstantInt::get(ScalarTC->getType(), 0); 5102265bb06SFlorian Hahn return Builder.CreateSelect(Cmp, Sub, Zero); 511fe1b51ffSSander de Smalen } 512413a66f3SAlexey Bataev case VPInstruction::ExplicitVectorLength: { 51360ed2361SAlexey Bataev // TODO: Restructure this code with an explicit remainder loop, vsetvli can 51460ed2361SAlexey Bataev // be outside of the main loop. 515aae7ac66SFlorian Hahn Value *AVL = State.get(getOperand(0), /*IsScalar*/ true); 516413a66f3SAlexey Bataev // Compute EVL 517413a66f3SAlexey Bataev assert(AVL->getType()->isIntegerTy() && 518413a66f3SAlexey Bataev "Requested vector length should be an integer."); 519413a66f3SAlexey Bataev 520413a66f3SAlexey Bataev assert(State.VF.isScalable() && "Expected scalable vector factor."); 521413a66f3SAlexey Bataev Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue()); 522413a66f3SAlexey Bataev 523413a66f3SAlexey Bataev Value *EVL = State.Builder.CreateIntrinsic( 524413a66f3SAlexey Bataev State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length, 525413a66f3SAlexey Bataev {AVL, VFArg, State.Builder.getTrue()}); 526413a66f3SAlexey Bataev return EVL; 527413a66f3SAlexey Bataev } 528af635a55SFlorian Hahn case VPInstruction::CanonicalIVIncrementForPart: { 5298ec40675SFlorian Hahn unsigned Part = getUnrollPart(*this); 530aae7ac66SFlorian Hahn auto *IV = State.get(getOperand(0), VPLane(0)); 5318ec40675SFlorian Hahn assert(Part != 0 && "Must have a positive part"); 5328ec40675SFlorian Hahn // The canonical IV is incremented by the vectorization factor (num of 5338ec40675SFlorian Hahn // SIMD elements) times the unroll part. 53403fee671SDavid Sherwood Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part); 535b6d994deSFlorian Hahn return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(), 536b6d994deSFlorian Hahn hasNoSignedWrap()); 53703fee671SDavid Sherwood } 53803975b7fSFlorian Hahn case VPInstruction::BranchOnCond: { 539aae7ac66SFlorian Hahn Value *Cond = State.get(getOperand(0), VPLane(0)); 54003975b7fSFlorian Hahn // Replace the temporary unreachable terminator with a new conditional 54103975b7fSFlorian Hahn // branch, hooking it up to backward destination for exiting blocks now and 54203975b7fSFlorian Hahn // to forward destination(s) later when they are created. 54303975b7fSFlorian Hahn BranchInst *CondBr = 54403975b7fSFlorian Hahn Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr); 54503975b7fSFlorian Hahn CondBr->setSuccessor(0, nullptr); 54603975b7fSFlorian Hahn Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 547ab9c2b1cSFlorian Hahn 548ab9c2b1cSFlorian Hahn if (!getParent()->isExiting()) 549ab9c2b1cSFlorian Hahn return CondBr; 550ab9c2b1cSFlorian Hahn 551ab9c2b1cSFlorian Hahn VPRegionBlock *ParentRegion = getParent()->getParent(); 552ab9c2b1cSFlorian Hahn VPBasicBlock *Header = ParentRegion->getEntryBasicBlock(); 553ab9c2b1cSFlorian Hahn CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]); 5542265bb06SFlorian Hahn return CondBr; 55503975b7fSFlorian Hahn } 55603975b7fSFlorian Hahn case VPInstruction::BranchOnCount: { 55703975b7fSFlorian Hahn // First create the compare. 55857f5d8f2SFlorian Hahn Value *IV = State.get(getOperand(0), /*IsScalar*/ true); 55957f5d8f2SFlorian Hahn Value *TC = State.get(getOperand(1), /*IsScalar*/ true); 56003975b7fSFlorian Hahn Value *Cond = Builder.CreateICmpEQ(IV, TC); 56103975b7fSFlorian Hahn 56203975b7fSFlorian Hahn // Now create the branch. 56303975b7fSFlorian Hahn auto *Plan = getParent()->getPlan(); 56403975b7fSFlorian Hahn VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); 56503975b7fSFlorian Hahn VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); 56603975b7fSFlorian Hahn 56703975b7fSFlorian Hahn // Replace the temporary unreachable terminator with a new conditional 56803975b7fSFlorian Hahn // branch, hooking it up to backward destination (the header) now and to the 56903975b7fSFlorian Hahn // forward destination (the exit/middle block) later when it is created. 57003975b7fSFlorian Hahn // Note that CreateCondBr expects a valid BB as first argument, so we need 57103975b7fSFlorian Hahn // to set it to nullptr later. 57203975b7fSFlorian Hahn BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), 57303975b7fSFlorian Hahn State.CFG.VPBB2IRBB[Header]); 57403975b7fSFlorian Hahn CondBr->setSuccessor(0, nullptr); 57503975b7fSFlorian Hahn Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 5762265bb06SFlorian Hahn return CondBr; 57703975b7fSFlorian Hahn } 578241fe837SFlorian Hahn case VPInstruction::ComputeReductionResult: { 579241fe837SFlorian Hahn // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary 580241fe837SFlorian Hahn // and will be removed by breaking up the recipe further. 581241fe837SFlorian Hahn auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0)); 582241fe837SFlorian Hahn auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue()); 583241fe837SFlorian Hahn // Get its reduction variable descriptor. 584241fe837SFlorian Hahn const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); 585241fe837SFlorian Hahn 586241fe837SFlorian Hahn RecurKind RK = RdxDesc.getRecurrenceKind(); 587241fe837SFlorian Hahn 588241fe837SFlorian Hahn Type *PhiTy = OrigPhi->getType(); 5898ec40675SFlorian Hahn // The recipe's operands are the reduction phi, followed by one operand for 5908ec40675SFlorian Hahn // each part of the reduction. 5918ec40675SFlorian Hahn unsigned UF = getNumOperands() - 1; 5928ec40675SFlorian Hahn VectorParts RdxParts(UF); 5938ec40675SFlorian Hahn for (unsigned Part = 0; Part < UF; ++Part) 59457f5d8f2SFlorian Hahn RdxParts[Part] = State.get(getOperand(1 + Part), PhiR->isInLoop()); 595241fe837SFlorian Hahn 596241fe837SFlorian Hahn // If the vector reduction can be performed in a smaller type, we truncate 597241fe837SFlorian Hahn // then extend the loop exit value to enable InstCombine to evaluate the 598241fe837SFlorian Hahn // entire expression in the smaller type. 599241fe837SFlorian Hahn // TODO: Handle this in truncateToMinBW. 600241fe837SFlorian Hahn if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { 601241fe837SFlorian Hahn Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF); 6028ec40675SFlorian Hahn for (unsigned Part = 0; Part < UF; ++Part) 603241fe837SFlorian Hahn RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); 604241fe837SFlorian Hahn } 605241fe837SFlorian Hahn // Reduce all of the unrolled parts into a single vector. 606241fe837SFlorian Hahn Value *ReducedPartRdx = RdxParts[0]; 6072a0ee090SRamkumar Ramachandra unsigned Op = RdxDesc.getOpcode(); 608bccb7ed8SFlorian Hahn if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) 609bccb7ed8SFlorian Hahn Op = Instruction::Or; 610241fe837SFlorian Hahn 611241fe837SFlorian Hahn if (PhiR->isOrdered()) { 6128ec40675SFlorian Hahn ReducedPartRdx = RdxParts[UF - 1]; 613241fe837SFlorian Hahn } else { 614241fe837SFlorian Hahn // Floating-point operations should have some FMF to enable the reduction. 615241fe837SFlorian Hahn IRBuilderBase::FastMathFlagGuard FMFG(Builder); 616241fe837SFlorian Hahn Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); 6178ec40675SFlorian Hahn for (unsigned Part = 1; Part < UF; ++Part) { 618241fe837SFlorian Hahn Value *RdxPart = RdxParts[Part]; 619241fe837SFlorian Hahn if (Op != Instruction::ICmp && Op != Instruction::FCmp) 620241fe837SFlorian Hahn ReducedPartRdx = Builder.CreateBinOp( 621241fe837SFlorian Hahn (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx"); 622b3cba9beSMel Chen else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) 623b3cba9beSMel Chen ReducedPartRdx = 624b3cba9beSMel Chen createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart); 625bccb7ed8SFlorian Hahn else 626241fe837SFlorian Hahn ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart); 627241fe837SFlorian Hahn } 628241fe837SFlorian Hahn } 629241fe837SFlorian Hahn 630241fe837SFlorian Hahn // Create the reduction after the loop. Note that inloop reductions create 631241fe837SFlorian Hahn // the target reduction in the loop using a Reduction recipe. 632bccb7ed8SFlorian Hahn if ((State.VF.isVector() || 633b3cba9beSMel Chen RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) || 634b3cba9beSMel Chen RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) && 635bccb7ed8SFlorian Hahn !PhiR->isInLoop()) { 636241fe837SFlorian Hahn ReducedPartRdx = 6373e8840baSPhilip Reames createReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi); 638241fe837SFlorian Hahn // If the reduction can be performed in a smaller type, we need to extend 639241fe837SFlorian Hahn // the reduction to the wider type before we branch to the original loop. 640241fe837SFlorian Hahn if (PhiTy != RdxDesc.getRecurrenceType()) 641241fe837SFlorian Hahn ReducedPartRdx = RdxDesc.isSigned() 642241fe837SFlorian Hahn ? Builder.CreateSExt(ReducedPartRdx, PhiTy) 643241fe837SFlorian Hahn : Builder.CreateZExt(ReducedPartRdx, PhiTy); 644241fe837SFlorian Hahn } 645241fe837SFlorian Hahn 646241fe837SFlorian Hahn return ReducedPartRdx; 647241fe837SFlorian Hahn } 64807b33013SFlorian Hahn case VPInstruction::ExtractFromEnd: { 64907b33013SFlorian Hahn auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue()); 65007b33013SFlorian Hahn unsigned Offset = CI->getZExtValue(); 65107b33013SFlorian Hahn assert(Offset > 0 && "Offset from end must be positive"); 65207b33013SFlorian Hahn Value *Res; 65307b33013SFlorian Hahn if (State.VF.isVector()) { 65407b33013SFlorian Hahn assert(Offset <= State.VF.getKnownMinValue() && 65507b33013SFlorian Hahn "invalid offset to extract from"); 65607b33013SFlorian Hahn // Extract lane VF - Offset from the operand. 657aae7ac66SFlorian Hahn Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset)); 65807b33013SFlorian Hahn } else { 65906c3a7d2SFlorian Hahn assert(Offset <= 1 && "invalid offset to extract from"); 66057f5d8f2SFlorian Hahn Res = State.get(getOperand(0)); 66107b33013SFlorian Hahn } 66240a72f8cSFlorian Hahn if (isa<ExtractElementInst>(Res)) 66307b33013SFlorian Hahn Res->setName(Name); 66407b33013SFlorian Hahn return Res; 66507b33013SFlorian Hahn } 666632317e9SFlorian Hahn case VPInstruction::LogicalAnd: { 66757f5d8f2SFlorian Hahn Value *A = State.get(getOperand(0)); 66857f5d8f2SFlorian Hahn Value *B = State.get(getOperand(1)); 669632317e9SFlorian Hahn return Builder.CreateLogicalAnd(A, B, Name); 670632317e9SFlorian Hahn } 67106bb8c9fSFlorian Hahn case VPInstruction::PtrAdd: { 67206bb8c9fSFlorian Hahn assert(vputils::onlyFirstLaneUsed(this) && 67306bb8c9fSFlorian Hahn "can only generate first lane for PtrAdd"); 67401cbbc52SFlorian Hahn Value *Ptr = State.get(getOperand(0), VPLane(0)); 67501cbbc52SFlorian Hahn Value *Addend = State.get(getOperand(1), VPLane(0)); 67611571874SNikita Popov return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags()); 67706bb8c9fSFlorian Hahn } 6789a5a8731SFlorian Hahn case VPInstruction::ResumePhi: { 6799a5a8731SFlorian Hahn Value *IncomingFromVPlanPred = 68057f5d8f2SFlorian Hahn State.get(getOperand(0), /* IsScalar */ true); 6819a5a8731SFlorian Hahn Value *IncomingFromOtherPreds = 68257f5d8f2SFlorian Hahn State.get(getOperand(1), /* IsScalar */ true); 6839a5a8731SFlorian Hahn auto *NewPhi = 684bb86c5ddSFlorian Hahn Builder.CreatePHI(State.TypeAnalysis.inferScalarType(this), 2, Name); 6859a5a8731SFlorian Hahn BasicBlock *VPlanPred = 6869a5a8731SFlorian Hahn State.CFG 6876c8f41d3SFlorian Hahn .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getPredecessors()[0])]; 6889a5a8731SFlorian Hahn NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred); 6894ad0fdd1SFlorian Hahn for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) { 6906c8f41d3SFlorian Hahn if (OtherPred == VPlanPred) 6916c8f41d3SFlorian Hahn continue; 6929a5a8731SFlorian Hahn NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred); 6939a5a8731SFlorian Hahn } 6949a5a8731SFlorian Hahn return NewPhi; 6959a5a8731SFlorian Hahn } 6965fae408dSFlorian Hahn case VPInstruction::AnyOf: { 6975fae408dSFlorian Hahn Value *A = State.get(getOperand(0)); 6985fae408dSFlorian Hahn return Builder.CreateOrReduce(A); 6995fae408dSFlorian Hahn } 7009a5a8731SFlorian Hahn 70103975b7fSFlorian Hahn default: 70203975b7fSFlorian Hahn llvm_unreachable("Unsupported opcode for instruction"); 70303975b7fSFlorian Hahn } 70403975b7fSFlorian Hahn } 70503975b7fSFlorian Hahn 70607b33013SFlorian Hahn bool VPInstruction::isVectorToScalar() const { 70707b33013SFlorian Hahn return getOpcode() == VPInstruction::ExtractFromEnd || 7085fae408dSFlorian Hahn getOpcode() == VPInstruction::ComputeReductionResult || 7095fae408dSFlorian Hahn getOpcode() == VPInstruction::AnyOf; 71007b33013SFlorian Hahn } 71107b33013SFlorian Hahn 7129a5a8731SFlorian Hahn bool VPInstruction::isSingleScalar() const { 7139a5a8731SFlorian Hahn return getOpcode() == VPInstruction::ResumePhi; 7149a5a8731SFlorian Hahn } 7159a5a8731SFlorian Hahn 716698ae660SFlorian Hahn #if !defined(NDEBUG) 717698ae660SFlorian Hahn bool VPInstruction::isFPMathOp() const { 718698ae660SFlorian Hahn // Inspired by FPMathOperator::classof. Notable differences are that we don't 719698ae660SFlorian Hahn // support Call, PHI and Select opcodes here yet. 720698ae660SFlorian Hahn return Opcode == Instruction::FAdd || Opcode == Instruction::FMul || 721698ae660SFlorian Hahn Opcode == Instruction::FNeg || Opcode == Instruction::FSub || 722698ae660SFlorian Hahn Opcode == Instruction::FDiv || Opcode == Instruction::FRem || 723f23246a0SFlorian Hahn Opcode == Instruction::FCmp || Opcode == Instruction::Select; 724698ae660SFlorian Hahn } 725698ae660SFlorian Hahn #endif 726698ae660SFlorian Hahn 72703975b7fSFlorian Hahn void VPInstruction::execute(VPTransformState &State) { 728aae7ac66SFlorian Hahn assert(!State.Lane && "VPInstruction executing an Lane"); 72903975b7fSFlorian Hahn IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 730f23246a0SFlorian Hahn assert((hasFastMathFlags() == isFPMathOp() || 731f23246a0SFlorian Hahn getOpcode() == Instruction::Select) && 732698ae660SFlorian Hahn "Recipe not a FPMathOp but has fast-math flags?"); 733698ae660SFlorian Hahn if (hasFastMathFlags()) 734698ae660SFlorian Hahn State.Builder.setFastMathFlags(getFastMathFlags()); 73516da9d53SFlorian Hahn State.setDebugLocFrom(getDebugLoc()); 7369a5a8731SFlorian Hahn bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() && 7379a5a8731SFlorian Hahn (vputils::onlyFirstLaneUsed(this) || 7389a5a8731SFlorian Hahn isVectorToScalar() || isSingleScalar()); 73906bb8c9fSFlorian Hahn bool GeneratesPerAllLanes = doesGeneratePerAllLanes(); 74006bb8c9fSFlorian Hahn if (GeneratesPerAllLanes) { 74106bb8c9fSFlorian Hahn for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue(); 74206bb8c9fSFlorian Hahn Lane != NumLanes; ++Lane) { 743aae7ac66SFlorian Hahn Value *GeneratedValue = generatePerLane(State, VPLane(Lane)); 74406bb8c9fSFlorian Hahn assert(GeneratedValue && "generatePerLane must produce a value"); 745aae7ac66SFlorian Hahn State.set(this, GeneratedValue, VPLane(Lane)); 74606bb8c9fSFlorian Hahn } 74706c3a7d2SFlorian Hahn return; 74806bb8c9fSFlorian Hahn } 74906bb8c9fSFlorian Hahn 75006c3a7d2SFlorian Hahn Value *GeneratedValue = generate(State); 7512265bb06SFlorian Hahn if (!hasResult()) 75206c3a7d2SFlorian Hahn return; 75306c3a7d2SFlorian Hahn assert(GeneratedValue && "generate must produce a value"); 75406c3a7d2SFlorian Hahn assert( 75506c3a7d2SFlorian Hahn (GeneratedValue->getType()->isVectorTy() == !GeneratesPerFirstLaneOnly || 75606bb8c9fSFlorian Hahn State.VF.isScalar()) && 75706bb8c9fSFlorian Hahn "scalar value but not only first lane defined"); 75857f5d8f2SFlorian Hahn State.set(this, GeneratedValue, 75906bb8c9fSFlorian Hahn /*IsScalar*/ GeneratesPerFirstLaneOnly); 7602265bb06SFlorian Hahn } 761911055e3SFlorian Hahn 762f0d5104cSLuke Lau bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { 763f0d5104cSLuke Lau if (Instruction::isBinaryOp(getOpcode())) 764f0d5104cSLuke Lau return false; 765f0d5104cSLuke Lau switch (getOpcode()) { 766f0d5104cSLuke Lau case Instruction::ICmp: 767f0d5104cSLuke Lau case Instruction::Select: 768f0d5104cSLuke Lau case VPInstruction::AnyOf: 769f0d5104cSLuke Lau case VPInstruction::CalculateTripCountMinusVF: 770f0d5104cSLuke Lau case VPInstruction::CanonicalIVIncrementForPart: 771f0d5104cSLuke Lau case VPInstruction::ExtractFromEnd: 772f0d5104cSLuke Lau case VPInstruction::FirstOrderRecurrenceSplice: 773f0d5104cSLuke Lau case VPInstruction::LogicalAnd: 774f0d5104cSLuke Lau case VPInstruction::Not: 775f0d5104cSLuke Lau case VPInstruction::PtrAdd: 776f0d5104cSLuke Lau return false; 777f0d5104cSLuke Lau default: 778f0d5104cSLuke Lau return true; 779f0d5104cSLuke Lau } 780f0d5104cSLuke Lau } 781f0d5104cSLuke Lau 78247abbf4fSFlorian Hahn bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { 78347abbf4fSFlorian Hahn assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); 78447abbf4fSFlorian Hahn if (Instruction::isBinaryOp(getOpcode())) 78547abbf4fSFlorian Hahn return vputils::onlyFirstLaneUsed(this); 78647abbf4fSFlorian Hahn 78747abbf4fSFlorian Hahn switch (getOpcode()) { 78847abbf4fSFlorian Hahn default: 78947abbf4fSFlorian Hahn return false; 79047abbf4fSFlorian Hahn case Instruction::ICmp: 791f148d579SAlexey Bataev case Instruction::Select: 7925fae408dSFlorian Hahn case Instruction::Or: 79306bb8c9fSFlorian Hahn case VPInstruction::PtrAdd: 79447abbf4fSFlorian Hahn // TODO: Cover additional opcodes. 79547abbf4fSFlorian Hahn return vputils::onlyFirstLaneUsed(this); 79647abbf4fSFlorian Hahn case VPInstruction::ActiveLaneMask: 797413a66f3SAlexey Bataev case VPInstruction::ExplicitVectorLength: 79847abbf4fSFlorian Hahn case VPInstruction::CalculateTripCountMinusVF: 79947abbf4fSFlorian Hahn case VPInstruction::CanonicalIVIncrementForPart: 80047abbf4fSFlorian Hahn case VPInstruction::BranchOnCount: 80199d6c6d9SFlorian Hahn case VPInstruction::BranchOnCond: 8029a5a8731SFlorian Hahn case VPInstruction::ResumePhi: 803911055e3SFlorian Hahn return true; 80447abbf4fSFlorian Hahn }; 80547abbf4fSFlorian Hahn llvm_unreachable("switch should return"); 80647abbf4fSFlorian Hahn } 80703975b7fSFlorian Hahn 8082f4ebf85SFlorian Hahn bool VPInstruction::onlyFirstPartUsed(const VPValue *Op) const { 8092f4ebf85SFlorian Hahn assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); 8102f4ebf85SFlorian Hahn if (Instruction::isBinaryOp(getOpcode())) 8112f4ebf85SFlorian Hahn return vputils::onlyFirstPartUsed(this); 8122f4ebf85SFlorian Hahn 8132f4ebf85SFlorian Hahn switch (getOpcode()) { 8142f4ebf85SFlorian Hahn default: 8152f4ebf85SFlorian Hahn return false; 8162f4ebf85SFlorian Hahn case Instruction::ICmp: 8172f4ebf85SFlorian Hahn case Instruction::Select: 8182f4ebf85SFlorian Hahn return vputils::onlyFirstPartUsed(this); 8192f4ebf85SFlorian Hahn case VPInstruction::BranchOnCount: 8202f4ebf85SFlorian Hahn case VPInstruction::BranchOnCond: 8212f4ebf85SFlorian Hahn case VPInstruction::CanonicalIVIncrementForPart: 8222f4ebf85SFlorian Hahn return true; 8232f4ebf85SFlorian Hahn }; 8242f4ebf85SFlorian Hahn llvm_unreachable("switch should return"); 8252f4ebf85SFlorian Hahn } 8262f4ebf85SFlorian Hahn 82703975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 82803975b7fSFlorian Hahn void VPInstruction::dump() const { 82903975b7fSFlorian Hahn VPSlotTracker SlotTracker(getParent()->getPlan()); 83003975b7fSFlorian Hahn print(dbgs(), "", SlotTracker); 83103975b7fSFlorian Hahn } 83203975b7fSFlorian Hahn 83303975b7fSFlorian Hahn void VPInstruction::print(raw_ostream &O, const Twine &Indent, 83403975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const { 83503975b7fSFlorian Hahn O << Indent << "EMIT "; 83603975b7fSFlorian Hahn 83703975b7fSFlorian Hahn if (hasResult()) { 83803975b7fSFlorian Hahn printAsOperand(O, SlotTracker); 83903975b7fSFlorian Hahn O << " = "; 84003975b7fSFlorian Hahn } 84103975b7fSFlorian Hahn 84203975b7fSFlorian Hahn switch (getOpcode()) { 84303975b7fSFlorian Hahn case VPInstruction::Not: 84403975b7fSFlorian Hahn O << "not"; 84503975b7fSFlorian Hahn break; 84603975b7fSFlorian Hahn case VPInstruction::SLPLoad: 84703975b7fSFlorian Hahn O << "combined load"; 84803975b7fSFlorian Hahn break; 84903975b7fSFlorian Hahn case VPInstruction::SLPStore: 85003975b7fSFlorian Hahn O << "combined store"; 85103975b7fSFlorian Hahn break; 85203975b7fSFlorian Hahn case VPInstruction::ActiveLaneMask: 85303975b7fSFlorian Hahn O << "active lane mask"; 85403975b7fSFlorian Hahn break; 8559a5a8731SFlorian Hahn case VPInstruction::ResumePhi: 8569a5a8731SFlorian Hahn O << "resume-phi"; 8579a5a8731SFlorian Hahn break; 858413a66f3SAlexey Bataev case VPInstruction::ExplicitVectorLength: 859413a66f3SAlexey Bataev O << "EXPLICIT-VECTOR-LENGTH"; 860413a66f3SAlexey Bataev break; 86103975b7fSFlorian Hahn case VPInstruction::FirstOrderRecurrenceSplice: 86203975b7fSFlorian Hahn O << "first-order splice"; 86303975b7fSFlorian Hahn break; 86403975b7fSFlorian Hahn case VPInstruction::BranchOnCond: 86503975b7fSFlorian Hahn O << "branch-on-cond"; 86603975b7fSFlorian Hahn break; 867fe1b51ffSSander de Smalen case VPInstruction::CalculateTripCountMinusVF: 868fe1b51ffSSander de Smalen O << "TC > VF ? TC - VF : 0"; 869fe1b51ffSSander de Smalen break; 87003fee671SDavid Sherwood case VPInstruction::CanonicalIVIncrementForPart: 87103fee671SDavid Sherwood O << "VF * Part +"; 87203fee671SDavid Sherwood break; 87303975b7fSFlorian Hahn case VPInstruction::BranchOnCount: 87403975b7fSFlorian Hahn O << "branch-on-count"; 87503975b7fSFlorian Hahn break; 87607b33013SFlorian Hahn case VPInstruction::ExtractFromEnd: 87707b33013SFlorian Hahn O << "extract-from-end"; 87807b33013SFlorian Hahn break; 879241fe837SFlorian Hahn case VPInstruction::ComputeReductionResult: 880241fe837SFlorian Hahn O << "compute-reduction-result"; 881241fe837SFlorian Hahn break; 882632317e9SFlorian Hahn case VPInstruction::LogicalAnd: 883632317e9SFlorian Hahn O << "logical-and"; 884632317e9SFlorian Hahn break; 88506bb8c9fSFlorian Hahn case VPInstruction::PtrAdd: 88606bb8c9fSFlorian Hahn O << "ptradd"; 88706bb8c9fSFlorian Hahn break; 8885fae408dSFlorian Hahn case VPInstruction::AnyOf: 8895fae408dSFlorian Hahn O << "any-of"; 8905fae408dSFlorian Hahn break; 89103975b7fSFlorian Hahn default: 89203975b7fSFlorian Hahn O << Instruction::getOpcodeName(getOpcode()); 89303975b7fSFlorian Hahn } 89403975b7fSFlorian Hahn 895af635a55SFlorian Hahn printFlags(O); 89693c5bae0SFlorian Hahn printOperands(O, SlotTracker); 89703975b7fSFlorian Hahn 898165e24aaSFlorian Hahn if (auto DL = getDebugLoc()) { 89903975b7fSFlorian Hahn O << ", !dbg "; 90003975b7fSFlorian Hahn DL.print(O); 90103975b7fSFlorian Hahn } 90203975b7fSFlorian Hahn } 90303975b7fSFlorian Hahn #endif 90403975b7fSFlorian Hahn 905f0c5caa8SFlorian Hahn void VPIRInstruction::execute(VPTransformState &State) { 906f0c5caa8SFlorian Hahn assert((isa<PHINode>(&I) || getNumOperands() == 0) && 907f0c5caa8SFlorian Hahn "Only PHINodes can have extra operands"); 9085fae408dSFlorian Hahn for (const auto &[Idx, Op] : enumerate(operands())) { 9095fae408dSFlorian Hahn VPValue *ExitValue = Op; 910f0c5caa8SFlorian Hahn auto Lane = vputils::isUniformAfterVectorization(ExitValue) 911f0c5caa8SFlorian Hahn ? VPLane::getFirstLane() 912f0c5caa8SFlorian Hahn : VPLane::getLastLaneForVF(State.VF); 9135fae408dSFlorian Hahn VPBlockBase *Pred = getParent()->getPredecessors()[Idx]; 9145fae408dSFlorian Hahn auto *PredVPBB = Pred->getExitingBasicBlock(); 915f0c5caa8SFlorian Hahn BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; 916f0c5caa8SFlorian Hahn // Set insertion point in PredBB in case an extract needs to be generated. 917f0c5caa8SFlorian Hahn // TODO: Model extracts explicitly. 918f0c5caa8SFlorian Hahn State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt()); 919aae7ac66SFlorian Hahn Value *V = State.get(ExitValue, VPLane(Lane)); 920f0c5caa8SFlorian Hahn auto *Phi = cast<PHINode>(&I); 921b021464dSFlorian Hahn // If there is no existing block for PredBB in the phi, add a new incoming 922b021464dSFlorian Hahn // value. Otherwise update the existing incoming value for PredBB. 923b021464dSFlorian Hahn if (Phi->getBasicBlockIndex(PredBB) == -1) 924f0c5caa8SFlorian Hahn Phi->addIncoming(V, PredBB); 925b021464dSFlorian Hahn else 926b021464dSFlorian Hahn Phi->setIncomingValueForBlock(PredBB, V); 927f0c5caa8SFlorian Hahn } 928f0c5caa8SFlorian Hahn 929f0c5caa8SFlorian Hahn // Advance the insert point after the wrapped IR instruction. This allows 930f0c5caa8SFlorian Hahn // interleaving VPIRInstructions and other recipes. 931f0c5caa8SFlorian Hahn State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator())); 932f0c5caa8SFlorian Hahn } 933f0c5caa8SFlorian Hahn 934fa3258ecSFlorian Hahn InstructionCost VPIRInstruction::computeCost(ElementCount VF, 935fa3258ecSFlorian Hahn VPCostContext &Ctx) const { 936fa3258ecSFlorian Hahn // The recipe wraps an existing IR instruction on the border of VPlan's scope, 937fa3258ecSFlorian Hahn // hence it does not contribute to the cost-modeling for the VPlan. 938fa3258ecSFlorian Hahn return 0; 939fa3258ecSFlorian Hahn } 940fa3258ecSFlorian Hahn 94109a29fccSFlorian Hahn void VPIRInstruction::extractLastLaneOfOperand(VPBuilder &Builder) { 94209a29fccSFlorian Hahn assert(isa<PHINode>(getInstruction()) && 94309a29fccSFlorian Hahn "can only add exiting operands to phi nodes"); 94409a29fccSFlorian Hahn assert(getNumOperands() == 1 && "must have a single operand"); 94509a29fccSFlorian Hahn VPValue *Exiting = getOperand(0); 94609a29fccSFlorian Hahn if (!Exiting->isLiveIn()) { 94709a29fccSFlorian Hahn LLVMContext &Ctx = getInstruction().getContext(); 94809a29fccSFlorian Hahn auto &Plan = *getParent()->getPlan(); 94909a29fccSFlorian Hahn Exiting = Builder.createNaryOp( 95009a29fccSFlorian Hahn VPInstruction::ExtractFromEnd, 95109a29fccSFlorian Hahn {Exiting, 95209a29fccSFlorian Hahn Plan.getOrAddLiveIn(ConstantInt::get(IntegerType::get(Ctx, 32), 1))}); 95309a29fccSFlorian Hahn } 95409a29fccSFlorian Hahn setOperand(0, Exiting); 95509a29fccSFlorian Hahn } 95609a29fccSFlorian Hahn 957f0c5caa8SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 958f0c5caa8SFlorian Hahn void VPIRInstruction::print(raw_ostream &O, const Twine &Indent, 959f0c5caa8SFlorian Hahn VPSlotTracker &SlotTracker) const { 960f0c5caa8SFlorian Hahn O << Indent << "IR " << I; 961f0c5caa8SFlorian Hahn 962f0c5caa8SFlorian Hahn if (getNumOperands() != 0) { 9635fae408dSFlorian Hahn O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": "; 9645fae408dSFlorian Hahn interleaveComma( 9655fae408dSFlorian Hahn enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) { 9665fae408dSFlorian Hahn Op.value()->printAsOperand(O, SlotTracker); 967e2519b67SFlorian Hahn O << " from "; 9685fae408dSFlorian Hahn getParent()->getPredecessors()[Op.index()]->printAsOperand(O); 9695fae408dSFlorian Hahn }); 970f0c5caa8SFlorian Hahn O << ")"; 971f0c5caa8SFlorian Hahn } 972f0c5caa8SFlorian Hahn } 973f0c5caa8SFlorian Hahn #endif 974f0c5caa8SFlorian Hahn 975408ebe5eSFlorian Hahn void VPWidenCallRecipe::execute(VPTransformState &State) { 9768bd02e5aSFlorian Hahn assert(State.VF.isVector() && "not widening"); 97742fb1facSFlorian Hahn State.setDebugLocFrom(getDebugLoc()); 978408ebe5eSFlorian Hahn 9796fbbe152SFlorian Hahn FunctionType *VFTy = Variant->getFunctionType(); 980593e25ffSJay Foad // Add return type if intrinsic is overloaded on it. 981408ebe5eSFlorian Hahn SmallVector<Value *, 4> Args; 982e846778eSFlorian Hahn for (const auto &I : enumerate(arg_operands())) { 983408ebe5eSFlorian Hahn Value *Arg; 984d4c01714SGraham Hunter // Some vectorized function variants may also take a scalar argument, 985d4c01714SGraham Hunter // e.g. linear parameters for pointers. This needs to be the scalar value 986d4c01714SGraham Hunter // from the start of the respective part when interleaving. 9876fbbe152SFlorian Hahn if (!VFTy->getParamType(I.index())->isVectorTy()) 988aae7ac66SFlorian Hahn Arg = State.get(I.value(), VPLane(0)); 9894d64a2bcSGraham Hunter else 9906fbbe152SFlorian Hahn Arg = State.get(I.value(), onlyFirstLaneUsed(I.value())); 9916fbbe152SFlorian Hahn Args.push_back(Arg); 9926fbbe152SFlorian Hahn } 9936fbbe152SFlorian Hahn 9946fbbe152SFlorian Hahn assert(Variant != nullptr && "Can't create vector function."); 9956fbbe152SFlorian Hahn 9966fbbe152SFlorian Hahn auto *CI = cast_or_null<CallInst>(getUnderlyingValue()); 9976fbbe152SFlorian Hahn SmallVector<OperandBundleDef, 1> OpBundles; 9986fbbe152SFlorian Hahn if (CI) 9996fbbe152SFlorian Hahn CI->getOperandBundlesAsDefs(OpBundles); 10006fbbe152SFlorian Hahn 10016fbbe152SFlorian Hahn CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles); 10026fbbe152SFlorian Hahn setFlags(V); 10036fbbe152SFlorian Hahn 10046fbbe152SFlorian Hahn if (!V->getType()->isVoidTy()) 10056fbbe152SFlorian Hahn State.set(this, V); 10066fbbe152SFlorian Hahn State.addMetadata(V, CI); 10076fbbe152SFlorian Hahn } 10086fbbe152SFlorian Hahn 10096fbbe152SFlorian Hahn InstructionCost VPWidenCallRecipe::computeCost(ElementCount VF, 10106fbbe152SFlorian Hahn VPCostContext &Ctx) const { 10116fbbe152SFlorian Hahn return Ctx.TTI.getCallInstrCost(nullptr, Variant->getReturnType(), 10126fbbe152SFlorian Hahn Variant->getFunctionType()->params(), 1013edf3a55bSJohn Brawn Ctx.CostKind); 10146fbbe152SFlorian Hahn } 10156fbbe152SFlorian Hahn 10166fbbe152SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 10176fbbe152SFlorian Hahn void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, 10186fbbe152SFlorian Hahn VPSlotTracker &SlotTracker) const { 10196fbbe152SFlorian Hahn O << Indent << "WIDEN-CALL "; 10206fbbe152SFlorian Hahn 10216fbbe152SFlorian Hahn Function *CalledFn = getCalledScalarFunction(); 10226fbbe152SFlorian Hahn if (CalledFn->getReturnType()->isVoidTy()) 10236fbbe152SFlorian Hahn O << "void "; 10246fbbe152SFlorian Hahn else { 10256fbbe152SFlorian Hahn printAsOperand(O, SlotTracker); 10266fbbe152SFlorian Hahn O << " = "; 10276fbbe152SFlorian Hahn } 10286fbbe152SFlorian Hahn 10296fbbe152SFlorian Hahn O << "call"; 10306fbbe152SFlorian Hahn printFlags(O); 10316fbbe152SFlorian Hahn O << " @" << CalledFn->getName() << "("; 10326fbbe152SFlorian Hahn interleaveComma(arg_operands(), O, [&O, &SlotTracker](VPValue *Op) { 10336fbbe152SFlorian Hahn Op->printAsOperand(O, SlotTracker); 10346fbbe152SFlorian Hahn }); 10356fbbe152SFlorian Hahn O << ")"; 10366fbbe152SFlorian Hahn 10376fbbe152SFlorian Hahn O << " (using library function"; 10386fbbe152SFlorian Hahn if (Variant->hasName()) 10396fbbe152SFlorian Hahn O << ": " << Variant->getName(); 10406fbbe152SFlorian Hahn O << ")"; 10416fbbe152SFlorian Hahn } 10426fbbe152SFlorian Hahn #endif 10436fbbe152SFlorian Hahn 10446fbbe152SFlorian Hahn void VPWidenIntrinsicRecipe::execute(VPTransformState &State) { 10456fbbe152SFlorian Hahn assert(State.VF.isVector() && "not widening"); 10466fbbe152SFlorian Hahn State.setDebugLocFrom(getDebugLoc()); 10476fbbe152SFlorian Hahn 10486fbbe152SFlorian Hahn SmallVector<Type *, 2> TysForDecl; 10496fbbe152SFlorian Hahn // Add return type if intrinsic is overloaded on it. 10508663b877SFinn Plummer if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1, State.TTI)) 10516fbbe152SFlorian Hahn TysForDecl.push_back(VectorType::get(getResultType(), State.VF)); 10526fbbe152SFlorian Hahn SmallVector<Value *, 4> Args; 10536fbbe152SFlorian Hahn for (const auto &I : enumerate(operands())) { 10546fbbe152SFlorian Hahn // Some intrinsics have a scalar argument - don't replace it with a 10556fbbe152SFlorian Hahn // vector. 10566fbbe152SFlorian Hahn Value *Arg; 105745c01e8aSFinn Plummer if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(), 105845c01e8aSFinn Plummer State.TTI)) 10596fbbe152SFlorian Hahn Arg = State.get(I.value(), VPLane(0)); 10606fbbe152SFlorian Hahn else 10616fbbe152SFlorian Hahn Arg = State.get(I.value(), onlyFirstLaneUsed(I.value())); 10628663b877SFinn Plummer if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(), 10638663b877SFinn Plummer State.TTI)) 1064408ebe5eSFlorian Hahn TysForDecl.push_back(Arg->getType()); 1065408ebe5eSFlorian Hahn Args.push_back(Arg); 1066408ebe5eSFlorian Hahn } 1067408ebe5eSFlorian Hahn 1068408ebe5eSFlorian Hahn // Use vector version of the intrinsic. 1069408ebe5eSFlorian Hahn Module *M = State.Builder.GetInsertBlock()->getModule(); 10706fbbe152SFlorian Hahn Function *VectorF = 1071fa789dffSRahul Joshi Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl); 1072b759020cSLiqinWeng assert(VectorF && 1073b759020cSLiqinWeng "Can't retrieve vector intrinsic or vector-predication intrinsics."); 10740fa5df19SGraham Hunter 10756fbbe152SFlorian Hahn auto *CI = cast_or_null<CallInst>(getUnderlyingValue()); 1076408ebe5eSFlorian Hahn SmallVector<OperandBundleDef, 1> OpBundles; 1077e846778eSFlorian Hahn if (CI) 1078e846778eSFlorian Hahn CI->getOperandBundlesAsDefs(OpBundles); 1079e846778eSFlorian Hahn 1080408ebe5eSFlorian Hahn CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles); 1081408ebe5eSFlorian Hahn 10820344123fSFlorian Hahn setFlags(V); 1083408ebe5eSFlorian Hahn 1084cd160a6eSFlorian Hahn if (!V->getType()->isVoidTy()) 108557f5d8f2SFlorian Hahn State.set(this, V); 1086e846778eSFlorian Hahn State.addMetadata(V, CI); 1087408ebe5eSFlorian Hahn } 1088408ebe5eSFlorian Hahn 10896fbbe152SFlorian Hahn InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF, 10909ccf8254SFlorian Hahn VPCostContext &Ctx) const { 1091b0de7fa4SFlorian Hahn // Some backends analyze intrinsic arguments to determine cost. Use the 1092b0de7fa4SFlorian Hahn // underlying value for the operand if it has one. Otherwise try to use the 1093b0de7fa4SFlorian Hahn // operand of the underlying call instruction, if there is one. Otherwise 1094b0de7fa4SFlorian Hahn // clear Arguments. 1095b0de7fa4SFlorian Hahn // TODO: Rework TTI interface to be independent of concrete IR values. 10969ccf8254SFlorian Hahn SmallVector<const Value *> Arguments; 1097b0de7fa4SFlorian Hahn for (const auto &[Idx, Op] : enumerate(operands())) { 10989ccf8254SFlorian Hahn auto *V = Op->getUnderlyingValue(); 10999ccf8254SFlorian Hahn if (!V) { 11004a3f46deSLiqinWeng // Push all the VP Intrinsic's ops into the Argments even if is nullptr. 11014a3f46deSLiqinWeng // Some VP Intrinsic's cost will assert the number of parameters. 11024a3f46deSLiqinWeng // Mainly appears in the following two scenarios: 11034a3f46deSLiqinWeng // 1. EVL Op is nullptr 11044a3f46deSLiqinWeng // 2. The Argmunt of the VP Intrinsic is also the VP Intrinsic 11054a3f46deSLiqinWeng if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)) { 11064a3f46deSLiqinWeng Arguments.push_back(V); 11074a3f46deSLiqinWeng continue; 11084a3f46deSLiqinWeng } 1109b0de7fa4SFlorian Hahn if (auto *UI = dyn_cast_or_null<CallBase>(getUnderlyingValue())) { 1110b0de7fa4SFlorian Hahn Arguments.push_back(UI->getArgOperand(Idx)); 1111b0de7fa4SFlorian Hahn continue; 1112b0de7fa4SFlorian Hahn } 11139ccf8254SFlorian Hahn Arguments.clear(); 11149ccf8254SFlorian Hahn break; 11159ccf8254SFlorian Hahn } 11169ccf8254SFlorian Hahn Arguments.push_back(V); 11179ccf8254SFlorian Hahn } 11189ccf8254SFlorian Hahn 11199ab5474eSBenjamin Maxwell Type *RetTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); 11209ccf8254SFlorian Hahn SmallVector<Type *> ParamTys; 11219ccf8254SFlorian Hahn for (unsigned I = 0; I != getNumOperands(); ++I) 11229ccf8254SFlorian Hahn ParamTys.push_back( 11239ab5474eSBenjamin Maxwell toVectorTy(Ctx.Types.inferScalarType(getOperand(I)), VF)); 11249ccf8254SFlorian Hahn 112550a02e7cSFlorian Hahn // TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst. 11260344123fSFlorian Hahn FastMathFlags FMF = hasFastMathFlags() ? getFastMathFlags() : FastMathFlags(); 112750a02e7cSFlorian Hahn IntrinsicCostAttributes CostAttrs( 112850a02e7cSFlorian Hahn VectorIntrinsicID, RetTy, Arguments, ParamTys, FMF, 112950a02e7cSFlorian Hahn dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue())); 1130edf3a55bSJohn Brawn return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, Ctx.CostKind); 11319ccf8254SFlorian Hahn } 11329ccf8254SFlorian Hahn 11336fbbe152SFlorian Hahn StringRef VPWidenIntrinsicRecipe::getIntrinsicName() const { 11346fbbe152SFlorian Hahn return Intrinsic::getBaseName(VectorIntrinsicID); 11356fbbe152SFlorian Hahn } 113603975b7fSFlorian Hahn 113734cdd67cSFlorian Hahn bool VPWidenIntrinsicRecipe::onlyFirstLaneUsed(const VPValue *Op) const { 113834cdd67cSFlorian Hahn assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); 113934cdd67cSFlorian Hahn // Vector predication intrinsics only demand the the first lane the last 114034cdd67cSFlorian Hahn // operand (the EVL operand). 114134cdd67cSFlorian Hahn return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) && 114234cdd67cSFlorian Hahn Op == getOperand(getNumOperands() - 1); 114334cdd67cSFlorian Hahn } 114434cdd67cSFlorian Hahn 11456fbbe152SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 11466fbbe152SFlorian Hahn void VPWidenIntrinsicRecipe::print(raw_ostream &O, const Twine &Indent, 11476fbbe152SFlorian Hahn VPSlotTracker &SlotTracker) const { 11486fbbe152SFlorian Hahn O << Indent << "WIDEN-INTRINSIC "; 11496fbbe152SFlorian Hahn if (ResultTy->isVoidTy()) { 115003975b7fSFlorian Hahn O << "void "; 11516fbbe152SFlorian Hahn } else { 115203975b7fSFlorian Hahn printAsOperand(O, SlotTracker); 115303975b7fSFlorian Hahn O << " = "; 115403975b7fSFlorian Hahn } 115503975b7fSFlorian Hahn 11560344123fSFlorian Hahn O << "call"; 11570344123fSFlorian Hahn printFlags(O); 11586fbbe152SFlorian Hahn O << getIntrinsicName() << "("; 11596fbbe152SFlorian Hahn 11606fbbe152SFlorian Hahn interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) { 1161e846778eSFlorian Hahn Op->printAsOperand(O, SlotTracker); 1162e846778eSFlorian Hahn }); 116303975b7fSFlorian Hahn O << ")"; 116403975b7fSFlorian Hahn } 11656f1a8c2dSGraham Hunter #endif 11666f1a8c2dSGraham Hunter 11676f1a8c2dSGraham Hunter void VPHistogramRecipe::execute(VPTransformState &State) { 11686f1a8c2dSGraham Hunter State.setDebugLocFrom(getDebugLoc()); 11696f1a8c2dSGraham Hunter IRBuilderBase &Builder = State.Builder; 11706f1a8c2dSGraham Hunter 11716f1a8c2dSGraham Hunter Value *Address = State.get(getOperand(0)); 11726f1a8c2dSGraham Hunter Value *IncAmt = State.get(getOperand(1), /*IsScalar=*/true); 11736f1a8c2dSGraham Hunter VectorType *VTy = cast<VectorType>(Address->getType()); 11746f1a8c2dSGraham Hunter 11756f1a8c2dSGraham Hunter // The histogram intrinsic requires a mask even if the recipe doesn't; 11766f1a8c2dSGraham Hunter // if the mask operand was omitted then all lanes should be executed and 11776f1a8c2dSGraham Hunter // we just need to synthesize an all-true mask. 11786f1a8c2dSGraham Hunter Value *Mask = nullptr; 11796f1a8c2dSGraham Hunter if (VPValue *VPMask = getMask()) 11806f1a8c2dSGraham Hunter Mask = State.get(VPMask); 11816f1a8c2dSGraham Hunter else 11826f1a8c2dSGraham Hunter Mask = 11836f1a8c2dSGraham Hunter Builder.CreateVectorSplat(VTy->getElementCount(), Builder.getInt1(1)); 11846f1a8c2dSGraham Hunter 11856f1a8c2dSGraham Hunter // If this is a subtract, we want to invert the increment amount. We may 11866f1a8c2dSGraham Hunter // add a separate intrinsic in future, but for now we'll try this. 11876f1a8c2dSGraham Hunter if (Opcode == Instruction::Sub) 11886f1a8c2dSGraham Hunter IncAmt = Builder.CreateNeg(IncAmt); 11896f1a8c2dSGraham Hunter else 11906f1a8c2dSGraham Hunter assert(Opcode == Instruction::Add && "only add or sub supported for now"); 11916f1a8c2dSGraham Hunter 11926f1a8c2dSGraham Hunter State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add, 11936f1a8c2dSGraham Hunter {VTy, IncAmt->getType()}, 11946f1a8c2dSGraham Hunter {Address, IncAmt, Mask}); 11956f1a8c2dSGraham Hunter } 11966f1a8c2dSGraham Hunter 11976f1a8c2dSGraham Hunter InstructionCost VPHistogramRecipe::computeCost(ElementCount VF, 11986f1a8c2dSGraham Hunter VPCostContext &Ctx) const { 11996f1a8c2dSGraham Hunter // FIXME: Take the gather and scatter into account as well. For now we're 12006f1a8c2dSGraham Hunter // generating the same cost as the fallback path, but we'll likely 12016f1a8c2dSGraham Hunter // need to create a new TTI method for determining the cost, including 12026f1a8c2dSGraham Hunter // whether we can use base + vec-of-smaller-indices or just 12036f1a8c2dSGraham Hunter // vec-of-pointers. 12046f1a8c2dSGraham Hunter assert(VF.isVector() && "Invalid VF for histogram cost"); 12056f1a8c2dSGraham Hunter Type *AddressTy = Ctx.Types.inferScalarType(getOperand(0)); 12066f1a8c2dSGraham Hunter VPValue *IncAmt = getOperand(1); 12076f1a8c2dSGraham Hunter Type *IncTy = Ctx.Types.inferScalarType(IncAmt); 12086f1a8c2dSGraham Hunter VectorType *VTy = VectorType::get(IncTy, VF); 12096f1a8c2dSGraham Hunter 12106f1a8c2dSGraham Hunter // Assume that a non-constant update value (or a constant != 1) requires 12116f1a8c2dSGraham Hunter // a multiply, and add that into the cost. 12126f1a8c2dSGraham Hunter InstructionCost MulCost = 1213edf3a55bSJohn Brawn Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy, Ctx.CostKind); 12146f1a8c2dSGraham Hunter if (IncAmt->isLiveIn()) { 12156f1a8c2dSGraham Hunter ConstantInt *CI = dyn_cast<ConstantInt>(IncAmt->getLiveInIRValue()); 12166f1a8c2dSGraham Hunter 12176f1a8c2dSGraham Hunter if (CI && CI->getZExtValue() == 1) 12186f1a8c2dSGraham Hunter MulCost = TTI::TCC_Free; 12196f1a8c2dSGraham Hunter } 12206f1a8c2dSGraham Hunter 12216f1a8c2dSGraham Hunter // Find the cost of the histogram operation itself. 12226f1a8c2dSGraham Hunter Type *PtrTy = VectorType::get(AddressTy, VF); 12236f1a8c2dSGraham Hunter Type *MaskTy = VectorType::get(Type::getInt1Ty(Ctx.LLVMCtx), VF); 12246f1a8c2dSGraham Hunter IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_histogram_add, 12256f1a8c2dSGraham Hunter Type::getVoidTy(Ctx.LLVMCtx), 12266f1a8c2dSGraham Hunter {PtrTy, IncTy, MaskTy}); 12276f1a8c2dSGraham Hunter 12286f1a8c2dSGraham Hunter // Add the costs together with the add/sub operation. 1229edf3a55bSJohn Brawn return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind) + MulCost + 1230edf3a55bSJohn Brawn Ctx.TTI.getArithmeticInstrCost(Opcode, VTy, Ctx.CostKind); 12316f1a8c2dSGraham Hunter } 12326f1a8c2dSGraham Hunter 12336f1a8c2dSGraham Hunter #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 12346f1a8c2dSGraham Hunter void VPHistogramRecipe::print(raw_ostream &O, const Twine &Indent, 12356f1a8c2dSGraham Hunter VPSlotTracker &SlotTracker) const { 12366f1a8c2dSGraham Hunter O << Indent << "WIDEN-HISTOGRAM buckets: "; 12376f1a8c2dSGraham Hunter getOperand(0)->printAsOperand(O, SlotTracker); 12386f1a8c2dSGraham Hunter 12396f1a8c2dSGraham Hunter if (Opcode == Instruction::Sub) 12406f1a8c2dSGraham Hunter O << ", dec: "; 12416f1a8c2dSGraham Hunter else { 12426f1a8c2dSGraham Hunter assert(Opcode == Instruction::Add); 12436f1a8c2dSGraham Hunter O << ", inc: "; 12446f1a8c2dSGraham Hunter } 12456f1a8c2dSGraham Hunter getOperand(1)->printAsOperand(O, SlotTracker); 12466f1a8c2dSGraham Hunter 12476f1a8c2dSGraham Hunter if (VPValue *Mask = getMask()) { 12486f1a8c2dSGraham Hunter O << ", mask: "; 12496f1a8c2dSGraham Hunter Mask->printAsOperand(O, SlotTracker); 12506f1a8c2dSGraham Hunter } 12516f1a8c2dSGraham Hunter } 125203975b7fSFlorian Hahn 125303975b7fSFlorian Hahn void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, 125403975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const { 125503975b7fSFlorian Hahn O << Indent << "WIDEN-SELECT "; 125603975b7fSFlorian Hahn printAsOperand(O, SlotTracker); 125703975b7fSFlorian Hahn O << " = select "; 12580294dab7SLiqinWeng printFlags(O); 125903975b7fSFlorian Hahn getOperand(0)->printAsOperand(O, SlotTracker); 126003975b7fSFlorian Hahn O << ", "; 126103975b7fSFlorian Hahn getOperand(1)->printAsOperand(O, SlotTracker); 126203975b7fSFlorian Hahn O << ", "; 126303975b7fSFlorian Hahn getOperand(2)->printAsOperand(O, SlotTracker); 126454558fd8SFlorian Hahn O << (isInvariantCond() ? " (condition is loop invariant)" : ""); 126503975b7fSFlorian Hahn } 12660c27b388SFlorian Hahn #endif 126703975b7fSFlorian Hahn 12680c27b388SFlorian Hahn void VPWidenSelectRecipe::execute(VPTransformState &State) { 1269165e24aaSFlorian Hahn State.setDebugLocFrom(getDebugLoc()); 12700c27b388SFlorian Hahn 12710c27b388SFlorian Hahn // The condition can be loop invariant but still defined inside the 12720c27b388SFlorian Hahn // loop. This means that we can't just use the original 'cond' value. 12730c27b388SFlorian Hahn // We have to take the 'vectorized' value and pick the first lane. 12740c27b388SFlorian Hahn // Instcombine will make this a no-op. 12750c27b388SFlorian Hahn auto *InvarCond = 1276aae7ac66SFlorian Hahn isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr; 12770c27b388SFlorian Hahn 127857f5d8f2SFlorian Hahn Value *Cond = InvarCond ? InvarCond : State.get(getCond()); 127957f5d8f2SFlorian Hahn Value *Op0 = State.get(getOperand(1)); 128057f5d8f2SFlorian Hahn Value *Op1 = State.get(getOperand(2)); 12810c27b388SFlorian Hahn Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1); 128257f5d8f2SFlorian Hahn State.set(this, Sel); 12830294dab7SLiqinWeng if (isa<FPMathOperator>(Sel)) 12840294dab7SLiqinWeng setFlags(cast<Instruction>(Sel)); 1285165e24aaSFlorian Hahn State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue())); 12860c27b388SFlorian Hahn } 12870c27b388SFlorian Hahn 12881d9b3222SFlorian Hahn InstructionCost VPWidenSelectRecipe::computeCost(ElementCount VF, 12891d9b3222SFlorian Hahn VPCostContext &Ctx) const { 12901d9b3222SFlorian Hahn SelectInst *SI = cast<SelectInst>(getUnderlyingValue()); 12911d9b3222SFlorian Hahn bool ScalarCond = getOperand(0)->isDefinedOutsideLoopRegions(); 12921d9b3222SFlorian Hahn Type *ScalarTy = Ctx.Types.inferScalarType(this); 12939ab5474eSBenjamin Maxwell Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); 12941d9b3222SFlorian Hahn 12951d9b3222SFlorian Hahn VPValue *Op0, *Op1; 12961d9b3222SFlorian Hahn using namespace llvm::VPlanPatternMatch; 12971d9b3222SFlorian Hahn if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 && 12981d9b3222SFlorian Hahn (match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) || 12991d9b3222SFlorian Hahn match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) { 13001d9b3222SFlorian Hahn // select x, y, false --> x & y 13011d9b3222SFlorian Hahn // select x, true, y --> x | y 13021d9b3222SFlorian Hahn const auto [Op1VK, Op1VP] = Ctx.getOperandInfo(Op0); 13031d9b3222SFlorian Hahn const auto [Op2VK, Op2VP] = Ctx.getOperandInfo(Op1); 13041d9b3222SFlorian Hahn 13051d9b3222SFlorian Hahn SmallVector<const Value *, 2> Operands; 13061d9b3222SFlorian Hahn if (all_of(operands(), 13071d9b3222SFlorian Hahn [](VPValue *Op) { return Op->getUnderlyingValue(); })) 13081d9b3222SFlorian Hahn Operands.append(SI->op_begin(), SI->op_end()); 13091d9b3222SFlorian Hahn bool IsLogicalOr = match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))); 13101d9b3222SFlorian Hahn return Ctx.TTI.getArithmeticInstrCost( 1311edf3a55bSJohn Brawn IsLogicalOr ? Instruction::Or : Instruction::And, VectorTy, 1312edf3a55bSJohn Brawn Ctx.CostKind, {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands, SI); 13131d9b3222SFlorian Hahn } 13141d9b3222SFlorian Hahn 13151d9b3222SFlorian Hahn Type *CondTy = Ctx.Types.inferScalarType(getOperand(0)); 13161d9b3222SFlorian Hahn if (!ScalarCond) 13171d9b3222SFlorian Hahn CondTy = VectorType::get(CondTy, VF); 13181d9b3222SFlorian Hahn 13191d9b3222SFlorian Hahn CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; 13201d9b3222SFlorian Hahn if (auto *Cmp = dyn_cast<CmpInst>(SI->getCondition())) 13211d9b3222SFlorian Hahn Pred = Cmp->getPredicate(); 1322edf3a55bSJohn Brawn return Ctx.TTI.getCmpSelInstrCost( 1323edf3a55bSJohn Brawn Instruction::Select, VectorTy, CondTy, Pred, Ctx.CostKind, 1324edf3a55bSJohn Brawn {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, SI); 13251d9b3222SFlorian Hahn } 13261d9b3222SFlorian Hahn 1327698ae660SFlorian Hahn VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy( 1328698ae660SFlorian Hahn const FastMathFlags &FMF) { 1329698ae660SFlorian Hahn AllowReassoc = FMF.allowReassoc(); 1330698ae660SFlorian Hahn NoNaNs = FMF.noNaNs(); 1331698ae660SFlorian Hahn NoInfs = FMF.noInfs(); 1332698ae660SFlorian Hahn NoSignedZeros = FMF.noSignedZeros(); 1333698ae660SFlorian Hahn AllowReciprocal = FMF.allowReciprocal(); 1334698ae660SFlorian Hahn AllowContract = FMF.allowContract(); 1335698ae660SFlorian Hahn ApproxFunc = FMF.approxFunc(); 1336698ae660SFlorian Hahn } 1337698ae660SFlorian Hahn 1338299f0ff6SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1339299f0ff6SFlorian Hahn void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const { 1340299f0ff6SFlorian Hahn switch (OpType) { 1341fd661957SFlorian Hahn case OperationType::Cmp: 1342fd661957SFlorian Hahn O << " " << CmpInst::getPredicateName(getPredicate()); 1343fd661957SFlorian Hahn break; 1344bbd1941aSFlorian Hahn case OperationType::DisjointOp: 1345bbd1941aSFlorian Hahn if (DisjointFlags.IsDisjoint) 1346bbd1941aSFlorian Hahn O << " disjoint"; 1347bbd1941aSFlorian Hahn break; 1348299f0ff6SFlorian Hahn case OperationType::PossiblyExactOp: 1349299f0ff6SFlorian Hahn if (ExactFlags.IsExact) 1350299f0ff6SFlorian Hahn O << " exact"; 1351299f0ff6SFlorian Hahn break; 1352299f0ff6SFlorian Hahn case OperationType::OverflowingBinOp: 1353299f0ff6SFlorian Hahn if (WrapFlags.HasNUW) 1354299f0ff6SFlorian Hahn O << " nuw"; 1355299f0ff6SFlorian Hahn if (WrapFlags.HasNSW) 1356299f0ff6SFlorian Hahn O << " nsw"; 1357299f0ff6SFlorian Hahn break; 1358299f0ff6SFlorian Hahn case OperationType::FPMathOp: 1359299f0ff6SFlorian Hahn getFastMathFlags().print(O); 1360299f0ff6SFlorian Hahn break; 1361299f0ff6SFlorian Hahn case OperationType::GEPOp: 136211571874SNikita Popov if (GEPFlags.isInBounds()) 1363299f0ff6SFlorian Hahn O << " inbounds"; 136411571874SNikita Popov else if (GEPFlags.hasNoUnsignedSignedWrap()) 136511571874SNikita Popov O << " nusw"; 136611571874SNikita Popov if (GEPFlags.hasNoUnsignedWrap()) 136711571874SNikita Popov O << " nuw"; 1368299f0ff6SFlorian Hahn break; 1369056367bbSAlexey Bataev case OperationType::NonNegOp: 1370056367bbSAlexey Bataev if (NonNegFlags.NonNeg) 1371056367bbSAlexey Bataev O << " nneg"; 1372056367bbSAlexey Bataev break; 1373299f0ff6SFlorian Hahn case OperationType::Other: 1374299f0ff6SFlorian Hahn break; 1375299f0ff6SFlorian Hahn } 1376af635a55SFlorian Hahn if (getNumOperands() > 0) 1377299f0ff6SFlorian Hahn O << " "; 1378299f0ff6SFlorian Hahn } 1379299f0ff6SFlorian Hahn #endif 1380299f0ff6SFlorian Hahn 138113ae2134SFlorian Hahn void VPWidenRecipe::execute(VPTransformState &State) { 1382165e24aaSFlorian Hahn State.setDebugLocFrom(getDebugLoc()); 138313ae2134SFlorian Hahn auto &Builder = State.Builder; 1384785e7063SFlorian Hahn switch (Opcode) { 138513ae2134SFlorian Hahn case Instruction::Call: 138613ae2134SFlorian Hahn case Instruction::Br: 138713ae2134SFlorian Hahn case Instruction::PHI: 138813ae2134SFlorian Hahn case Instruction::GetElementPtr: 138913ae2134SFlorian Hahn case Instruction::Select: 139013ae2134SFlorian Hahn llvm_unreachable("This instruction is handled by a different recipe."); 139113ae2134SFlorian Hahn case Instruction::UDiv: 139213ae2134SFlorian Hahn case Instruction::SDiv: 139313ae2134SFlorian Hahn case Instruction::SRem: 139413ae2134SFlorian Hahn case Instruction::URem: 139513ae2134SFlorian Hahn case Instruction::Add: 139613ae2134SFlorian Hahn case Instruction::FAdd: 139713ae2134SFlorian Hahn case Instruction::Sub: 139813ae2134SFlorian Hahn case Instruction::FSub: 139913ae2134SFlorian Hahn case Instruction::FNeg: 140013ae2134SFlorian Hahn case Instruction::Mul: 140113ae2134SFlorian Hahn case Instruction::FMul: 140213ae2134SFlorian Hahn case Instruction::FDiv: 140313ae2134SFlorian Hahn case Instruction::FRem: 140413ae2134SFlorian Hahn case Instruction::Shl: 140513ae2134SFlorian Hahn case Instruction::LShr: 140613ae2134SFlorian Hahn case Instruction::AShr: 140713ae2134SFlorian Hahn case Instruction::And: 140813ae2134SFlorian Hahn case Instruction::Or: 140913ae2134SFlorian Hahn case Instruction::Xor: { 141013ae2134SFlorian Hahn // Just widen unops and binops. 141113ae2134SFlorian Hahn SmallVector<Value *, 2> Ops; 141213ae2134SFlorian Hahn for (VPValue *VPOp : operands()) 141357f5d8f2SFlorian Hahn Ops.push_back(State.get(VPOp)); 141413ae2134SFlorian Hahn 1415785e7063SFlorian Hahn Value *V = Builder.CreateNAryOp(Opcode, Ops); 141613ae2134SFlorian Hahn 1417127b00b2SFlorian Hahn if (auto *VecOp = dyn_cast<Instruction>(V)) 1418127b00b2SFlorian Hahn setFlags(VecOp); 141913ae2134SFlorian Hahn 142013ae2134SFlorian Hahn // Use this vector value for all users of the original instruction. 142157f5d8f2SFlorian Hahn State.set(this, V); 1422785e7063SFlorian Hahn State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue())); 142313ae2134SFlorian Hahn break; 142413ae2134SFlorian Hahn } 142513ae2134SFlorian Hahn case Instruction::Freeze: { 142657f5d8f2SFlorian Hahn Value *Op = State.get(getOperand(0)); 142713ae2134SFlorian Hahn 142813ae2134SFlorian Hahn Value *Freeze = Builder.CreateFreeze(Op); 142957f5d8f2SFlorian Hahn State.set(this, Freeze); 143013ae2134SFlorian Hahn break; 143113ae2134SFlorian Hahn } 143213ae2134SFlorian Hahn case Instruction::ICmp: 143313ae2134SFlorian Hahn case Instruction::FCmp: { 143413ae2134SFlorian Hahn // Widen compares. Generate vector compares. 1435785e7063SFlorian Hahn bool FCmp = Opcode == Instruction::FCmp; 143657f5d8f2SFlorian Hahn Value *A = State.get(getOperand(0)); 143757f5d8f2SFlorian Hahn Value *B = State.get(getOperand(1)); 143813ae2134SFlorian Hahn Value *C = nullptr; 143913ae2134SFlorian Hahn if (FCmp) { 144013ae2134SFlorian Hahn // Propagate fast math flags. 1441a77346baSYingwei Zheng C = Builder.CreateFCmpFMF( 1442a77346baSYingwei Zheng getPredicate(), A, B, 1443a77346baSYingwei Zheng dyn_cast_or_null<Instruction>(getUnderlyingValue())); 144413ae2134SFlorian Hahn } else { 1445785e7063SFlorian Hahn C = Builder.CreateICmp(getPredicate(), A, B); 144613ae2134SFlorian Hahn } 144757f5d8f2SFlorian Hahn State.set(this, C); 1448785e7063SFlorian Hahn State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue())); 144913ae2134SFlorian Hahn break; 145013ae2134SFlorian Hahn } 145113ae2134SFlorian Hahn default: 145213ae2134SFlorian Hahn // This instruction is not vectorized by simple widening. 1453785e7063SFlorian Hahn LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : " 1454785e7063SFlorian Hahn << Instruction::getOpcodeName(Opcode)); 145513ae2134SFlorian Hahn llvm_unreachable("Unhandled instruction!"); 145613ae2134SFlorian Hahn } // end of switch. 1457b0b88643SFlorian Hahn 1458b0b88643SFlorian Hahn #if !defined(NDEBUG) 1459b0b88643SFlorian Hahn // Verify that VPlan type inference results agree with the type of the 1460b0b88643SFlorian Hahn // generated values. 146106c3a7d2SFlorian Hahn assert(VectorType::get(State.TypeAnalysis.inferScalarType(this), State.VF) == 146257f5d8f2SFlorian Hahn State.get(this)->getType() && 1463b0b88643SFlorian Hahn "inferred type and type from generated instructions do not match"); 1464b0b88643SFlorian Hahn #endif 1465b0b88643SFlorian Hahn } 1466b0b88643SFlorian Hahn 14671aa8a6f6SFlorian Hahn InstructionCost VPWidenRecipe::computeCost(ElementCount VF, 14681aa8a6f6SFlorian Hahn VPCostContext &Ctx) const { 14691aa8a6f6SFlorian Hahn switch (Opcode) { 14701aa8a6f6SFlorian Hahn case Instruction::FNeg: { 14719ab5474eSBenjamin Maxwell Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); 14721aa8a6f6SFlorian Hahn return Ctx.TTI.getArithmeticInstrCost( 1473edf3a55bSJohn Brawn Opcode, VectorTy, Ctx.CostKind, 14741aa8a6f6SFlorian Hahn {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}, 14751aa8a6f6SFlorian Hahn {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}); 14761aa8a6f6SFlorian Hahn } 14771aa8a6f6SFlorian Hahn 14781aa8a6f6SFlorian Hahn case Instruction::UDiv: 14791aa8a6f6SFlorian Hahn case Instruction::SDiv: 14801aa8a6f6SFlorian Hahn case Instruction::SRem: 14811aa8a6f6SFlorian Hahn case Instruction::URem: 14821aa8a6f6SFlorian Hahn // More complex computation, let the legacy cost-model handle this for now. 14831aa8a6f6SFlorian Hahn return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF); 14841aa8a6f6SFlorian Hahn case Instruction::Add: 14851aa8a6f6SFlorian Hahn case Instruction::FAdd: 14861aa8a6f6SFlorian Hahn case Instruction::Sub: 14871aa8a6f6SFlorian Hahn case Instruction::FSub: 14881aa8a6f6SFlorian Hahn case Instruction::Mul: 14891aa8a6f6SFlorian Hahn case Instruction::FMul: 14901aa8a6f6SFlorian Hahn case Instruction::FDiv: 14911aa8a6f6SFlorian Hahn case Instruction::FRem: 14921aa8a6f6SFlorian Hahn case Instruction::Shl: 14931aa8a6f6SFlorian Hahn case Instruction::LShr: 14941aa8a6f6SFlorian Hahn case Instruction::AShr: 14951aa8a6f6SFlorian Hahn case Instruction::And: 14961aa8a6f6SFlorian Hahn case Instruction::Or: 14971aa8a6f6SFlorian Hahn case Instruction::Xor: { 14981aa8a6f6SFlorian Hahn VPValue *RHS = getOperand(1); 14991aa8a6f6SFlorian Hahn // Certain instructions can be cheaper to vectorize if they have a constant 15001aa8a6f6SFlorian Hahn // second vector operand. One example of this are shifts on x86. 15011aa8a6f6SFlorian Hahn TargetTransformInfo::OperandValueInfo RHSInfo = { 15021aa8a6f6SFlorian Hahn TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}; 15031aa8a6f6SFlorian Hahn if (RHS->isLiveIn()) 15041aa8a6f6SFlorian Hahn RHSInfo = Ctx.TTI.getOperandInfo(RHS->getLiveInIRValue()); 15051aa8a6f6SFlorian Hahn 15061aa8a6f6SFlorian Hahn if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue && 150725610048SFlorian Hahn getOperand(1)->isDefinedOutsideLoopRegions()) 15081aa8a6f6SFlorian Hahn RHSInfo.Kind = TargetTransformInfo::OK_UniformValue; 15099ab5474eSBenjamin Maxwell Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); 15101aa8a6f6SFlorian Hahn Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue()); 15111aa8a6f6SFlorian Hahn 15121aa8a6f6SFlorian Hahn SmallVector<const Value *, 4> Operands; 15131aa8a6f6SFlorian Hahn if (CtxI) 15141aa8a6f6SFlorian Hahn Operands.append(CtxI->value_op_begin(), CtxI->value_op_end()); 15151aa8a6f6SFlorian Hahn return Ctx.TTI.getArithmeticInstrCost( 1516edf3a55bSJohn Brawn Opcode, VectorTy, Ctx.CostKind, 15171aa8a6f6SFlorian Hahn {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}, 15181aa8a6f6SFlorian Hahn RHSInfo, Operands, CtxI, &Ctx.TLI); 15191aa8a6f6SFlorian Hahn } 15201aa8a6f6SFlorian Hahn case Instruction::Freeze: { 15211aa8a6f6SFlorian Hahn // This opcode is unknown. Assume that it is the same as 'mul'. 15229ab5474eSBenjamin Maxwell Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); 1523edf3a55bSJohn Brawn return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, 1524edf3a55bSJohn Brawn Ctx.CostKind); 15251aa8a6f6SFlorian Hahn } 15261aa8a6f6SFlorian Hahn case Instruction::ICmp: 15271aa8a6f6SFlorian Hahn case Instruction::FCmp: { 15281aa8a6f6SFlorian Hahn Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue()); 15299ab5474eSBenjamin Maxwell Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF); 15301aa8a6f6SFlorian Hahn return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(), 1531edf3a55bSJohn Brawn Ctx.CostKind, 1532d2885743SPhilip Reames {TTI::OK_AnyValue, TTI::OP_None}, 1533d2885743SPhilip Reames {TTI::OK_AnyValue, TTI::OP_None}, CtxI); 15341aa8a6f6SFlorian Hahn } 15351aa8a6f6SFlorian Hahn default: 15361aa8a6f6SFlorian Hahn llvm_unreachable("Unsupported opcode for instruction"); 15371aa8a6f6SFlorian Hahn } 15381aa8a6f6SFlorian Hahn } 15391aa8a6f6SFlorian Hahn 154000e40c9bSKolya Panchenko void VPWidenEVLRecipe::execute(VPTransformState &State) { 154100e40c9bSKolya Panchenko unsigned Opcode = getOpcode(); 154200e40c9bSKolya Panchenko // TODO: Support other opcodes 154300e40c9bSKolya Panchenko if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode)) 154400e40c9bSKolya Panchenko llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute"); 154500e40c9bSKolya Panchenko 154600e40c9bSKolya Panchenko State.setDebugLocFrom(getDebugLoc()); 154700e40c9bSKolya Panchenko 154857f5d8f2SFlorian Hahn assert(State.get(getOperand(0))->getType()->isVectorTy() && 154900e40c9bSKolya Panchenko "VPWidenEVLRecipe should not be used for scalars"); 155000e40c9bSKolya Panchenko 155100e40c9bSKolya Panchenko VPValue *EVL = getEVL(); 155257f5d8f2SFlorian Hahn Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true); 155300e40c9bSKolya Panchenko IRBuilderBase &BuilderIR = State.Builder; 155400e40c9bSKolya Panchenko VectorBuilder Builder(BuilderIR); 155500e40c9bSKolya Panchenko Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue()); 155600e40c9bSKolya Panchenko 155700e40c9bSKolya Panchenko SmallVector<Value *, 4> Ops; 155800e40c9bSKolya Panchenko for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) { 155900e40c9bSKolya Panchenko VPValue *VPOp = getOperand(I); 156057f5d8f2SFlorian Hahn Ops.push_back(State.get(VPOp)); 156100e40c9bSKolya Panchenko } 156200e40c9bSKolya Panchenko 156300e40c9bSKolya Panchenko Builder.setMask(Mask).setEVL(EVLArg); 156400e40c9bSKolya Panchenko Value *VPInst = 156500e40c9bSKolya Panchenko Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op"); 156600e40c9bSKolya Panchenko // Currently vp-intrinsics only accept FMF flags. 156700e40c9bSKolya Panchenko // TODO: Enable other flags when support is added. 156800e40c9bSKolya Panchenko if (isa<FPMathOperator>(VPInst)) 156900e40c9bSKolya Panchenko setFlags(cast<Instruction>(VPInst)); 157000e40c9bSKolya Panchenko 157157f5d8f2SFlorian Hahn State.set(this, VPInst); 157200e40c9bSKolya Panchenko State.addMetadata(VPInst, 157300e40c9bSKolya Panchenko dyn_cast_or_null<Instruction>(getUnderlyingValue())); 157400e40c9bSKolya Panchenko } 157500e40c9bSKolya Panchenko 15760c27b388SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 157703975b7fSFlorian Hahn void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, 157803975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const { 157903975b7fSFlorian Hahn O << Indent << "WIDEN "; 158003975b7fSFlorian Hahn printAsOperand(O, SlotTracker); 1581785e7063SFlorian Hahn O << " = " << Instruction::getOpcodeName(Opcode); 1582299f0ff6SFlorian Hahn printFlags(O); 158303975b7fSFlorian Hahn printOperands(O, SlotTracker); 158403975b7fSFlorian Hahn } 158500e40c9bSKolya Panchenko 158600e40c9bSKolya Panchenko void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent, 158700e40c9bSKolya Panchenko VPSlotTracker &SlotTracker) const { 1588a2994b29SLiqinWeng O << Indent << "WIDEN "; 158900e40c9bSKolya Panchenko printAsOperand(O, SlotTracker); 1590a2994b29SLiqinWeng O << " = vp." << Instruction::getOpcodeName(getOpcode()); 159100e40c9bSKolya Panchenko printFlags(O); 159200e40c9bSKolya Panchenko printOperands(O, SlotTracker); 159300e40c9bSKolya Panchenko } 1594e3afe0b8SFlorian Hahn #endif 1595e3afe0b8SFlorian Hahn 1596e3afe0b8SFlorian Hahn void VPWidenCastRecipe::execute(VPTransformState &State) { 1597165e24aaSFlorian Hahn State.setDebugLocFrom(getDebugLoc()); 1598e3afe0b8SFlorian Hahn auto &Builder = State.Builder; 1599e3afe0b8SFlorian Hahn /// Vectorize casts. 1600e3afe0b8SFlorian Hahn assert(State.VF.isVector() && "Not vectorizing?"); 1601e3afe0b8SFlorian Hahn Type *DestTy = VectorType::get(getResultType(), State.VF); 160270535f5eSFlorian Hahn VPValue *Op = getOperand(0); 160357f5d8f2SFlorian Hahn Value *A = State.get(Op); 1604e3afe0b8SFlorian Hahn Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy); 160557f5d8f2SFlorian Hahn State.set(this, Cast); 1606165e24aaSFlorian Hahn State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue())); 16078af5ae06SNoah Goldstein if (auto *CastOp = dyn_cast<Instruction>(Cast)) 16088af5ae06SNoah Goldstein setFlags(CastOp); 1609e3afe0b8SFlorian Hahn } 1610e3afe0b8SFlorian Hahn 1611b3edc764SElvis Wang InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF, 1612b3edc764SElvis Wang VPCostContext &Ctx) const { 1613e724226dSFlorian Hahn // TODO: In some cases, VPWidenCastRecipes are created but not considered in 1614e724226dSFlorian Hahn // the legacy cost model, including truncates/extends when evaluating a 1615e724226dSFlorian Hahn // reduction in a smaller type. 1616e724226dSFlorian Hahn if (!getUnderlyingValue()) 1617e724226dSFlorian Hahn return 0; 1618b3edc764SElvis Wang // Computes the CastContextHint from a recipes that may access memory. 1619b3edc764SElvis Wang auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint { 1620b3edc764SElvis Wang if (VF.isScalar()) 1621b3edc764SElvis Wang return TTI::CastContextHint::Normal; 1622b3edc764SElvis Wang if (isa<VPInterleaveRecipe>(R)) 1623b3edc764SElvis Wang return TTI::CastContextHint::Interleave; 1624b3edc764SElvis Wang if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R)) 1625b3edc764SElvis Wang return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked 1626b3edc764SElvis Wang : TTI::CastContextHint::Normal; 1627b3edc764SElvis Wang const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R); 1628b3edc764SElvis Wang if (WidenMemoryRecipe == nullptr) 1629b3edc764SElvis Wang return TTI::CastContextHint::None; 1630b3edc764SElvis Wang if (!WidenMemoryRecipe->isConsecutive()) 1631b3edc764SElvis Wang return TTI::CastContextHint::GatherScatter; 1632b3edc764SElvis Wang if (WidenMemoryRecipe->isReverse()) 1633b3edc764SElvis Wang return TTI::CastContextHint::Reversed; 1634b3edc764SElvis Wang if (WidenMemoryRecipe->isMasked()) 1635b3edc764SElvis Wang return TTI::CastContextHint::Masked; 1636b3edc764SElvis Wang return TTI::CastContextHint::Normal; 1637b3edc764SElvis Wang }; 1638b3edc764SElvis Wang 1639b3edc764SElvis Wang VPValue *Operand = getOperand(0); 1640b3edc764SElvis Wang TTI::CastContextHint CCH = TTI::CastContextHint::None; 1641b3edc764SElvis Wang // For Trunc/FPTrunc, get the context from the only user. 1642b3edc764SElvis Wang if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) && 1643b3edc764SElvis Wang !hasMoreThanOneUniqueUser() && getNumUsers() > 0) { 1644b3edc764SElvis Wang if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin())) 1645b3edc764SElvis Wang CCH = ComputeCCH(StoreRecipe); 1646b3edc764SElvis Wang } 1647b3edc764SElvis Wang // For Z/Sext, get the context from the operand. 1648b3edc764SElvis Wang else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt || 1649b3edc764SElvis Wang Opcode == Instruction::FPExt) { 1650b3edc764SElvis Wang if (Operand->isLiveIn()) 1651b3edc764SElvis Wang CCH = TTI::CastContextHint::Normal; 1652b3edc764SElvis Wang else if (Operand->getDefiningRecipe()) 1653b3edc764SElvis Wang CCH = ComputeCCH(Operand->getDefiningRecipe()); 1654b3edc764SElvis Wang } 1655b3edc764SElvis Wang 1656b3edc764SElvis Wang auto *SrcTy = 16579ab5474eSBenjamin Maxwell cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(Operand), VF)); 16589ab5474eSBenjamin Maxwell auto *DestTy = cast<VectorType>(toVectorTy(getResultType(), VF)); 1659b3edc764SElvis Wang // Arm TTI will use the underlying instruction to determine the cost. 1660b3edc764SElvis Wang return Ctx.TTI.getCastInstrCost( 1661edf3a55bSJohn Brawn Opcode, DestTy, SrcTy, CCH, Ctx.CostKind, 1662b3edc764SElvis Wang dyn_cast_if_present<Instruction>(getUnderlyingValue())); 1663b3edc764SElvis Wang } 1664b3edc764SElvis Wang 1665e3afe0b8SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1666e3afe0b8SFlorian Hahn void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent, 1667e3afe0b8SFlorian Hahn VPSlotTracker &SlotTracker) const { 1668e3afe0b8SFlorian Hahn O << Indent << "WIDEN-CAST "; 1669e3afe0b8SFlorian Hahn printAsOperand(O, SlotTracker); 16702d038caeSFlorian Hahn O << " = " << Instruction::getOpcodeName(Opcode); 1671633fe601SFlorian Hahn printFlags(O); 1672e3afe0b8SFlorian Hahn printOperands(O, SlotTracker); 1673e3afe0b8SFlorian Hahn O << " to " << *getResultType(); 1674e3afe0b8SFlorian Hahn } 167556f5738dSFlorian Hahn #endif 167603975b7fSFlorian Hahn 1677680901edSFlorian Hahn InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF, 1678680901edSFlorian Hahn VPCostContext &Ctx) const { 1679edf3a55bSJohn Brawn return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind); 1680680901edSFlorian Hahn } 1681680901edSFlorian Hahn 168256f5738dSFlorian Hahn /// This function adds 16834746395bSLuke Lau /// (0 * Step, 1 * Step, 2 * Step, ...) 16844746395bSLuke Lau /// to each vector element of Val. 168556f5738dSFlorian Hahn /// \p Opcode is relevant for FP induction variable. 16864746395bSLuke Lau static Value *getStepVector(Value *Val, Value *Step, 168756f5738dSFlorian Hahn Instruction::BinaryOps BinOp, ElementCount VF, 168856f5738dSFlorian Hahn IRBuilderBase &Builder) { 168956f5738dSFlorian Hahn assert(VF.isVector() && "only vector VFs are supported"); 169056f5738dSFlorian Hahn 169156f5738dSFlorian Hahn // Create and check the types. 169256f5738dSFlorian Hahn auto *ValVTy = cast<VectorType>(Val->getType()); 169356f5738dSFlorian Hahn ElementCount VLen = ValVTy->getElementCount(); 169456f5738dSFlorian Hahn 169556f5738dSFlorian Hahn Type *STy = Val->getType()->getScalarType(); 169656f5738dSFlorian Hahn assert((STy->isIntegerTy() || STy->isFloatingPointTy()) && 169756f5738dSFlorian Hahn "Induction Step must be an integer or FP"); 169856f5738dSFlorian Hahn assert(Step->getType() == STy && "Step has wrong type"); 169956f5738dSFlorian Hahn 170056f5738dSFlorian Hahn SmallVector<Constant *, 8> Indices; 170156f5738dSFlorian Hahn 170256f5738dSFlorian Hahn // Create a vector of consecutive numbers from zero to VF. 170356f5738dSFlorian Hahn VectorType *InitVecValVTy = ValVTy; 170456f5738dSFlorian Hahn if (STy->isFloatingPointTy()) { 170556f5738dSFlorian Hahn Type *InitVecValSTy = 170656f5738dSFlorian Hahn IntegerType::get(STy->getContext(), STy->getScalarSizeInBits()); 170756f5738dSFlorian Hahn InitVecValVTy = VectorType::get(InitVecValSTy, VLen); 170856f5738dSFlorian Hahn } 170956f5738dSFlorian Hahn Value *InitVec = Builder.CreateStepVector(InitVecValVTy); 171056f5738dSFlorian Hahn 171156f5738dSFlorian Hahn if (STy->isIntegerTy()) { 171256f5738dSFlorian Hahn Step = Builder.CreateVectorSplat(VLen, Step); 171356f5738dSFlorian Hahn assert(Step->getType() == Val->getType() && "Invalid step vec"); 171456f5738dSFlorian Hahn // FIXME: The newly created binary instructions should contain nsw/nuw 171556f5738dSFlorian Hahn // flags, which can be found from the original scalar operations. 171656f5738dSFlorian Hahn Step = Builder.CreateMul(InitVec, Step); 171756f5738dSFlorian Hahn return Builder.CreateAdd(Val, Step, "induction"); 171856f5738dSFlorian Hahn } 171956f5738dSFlorian Hahn 172056f5738dSFlorian Hahn // Floating point induction. 172156f5738dSFlorian Hahn assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) && 172256f5738dSFlorian Hahn "Binary Opcode should be specified for FP induction"); 172356f5738dSFlorian Hahn InitVec = Builder.CreateUIToFP(InitVec, ValVTy); 172456f5738dSFlorian Hahn 172556f5738dSFlorian Hahn Step = Builder.CreateVectorSplat(VLen, Step); 172656f5738dSFlorian Hahn Value *MulOp = Builder.CreateFMul(InitVec, Step); 172756f5738dSFlorian Hahn return Builder.CreateBinOp(BinOp, Val, MulOp, "induction"); 172856f5738dSFlorian Hahn } 172956f5738dSFlorian Hahn 173056f5738dSFlorian Hahn /// A helper function that returns an integer or floating-point constant with 173156f5738dSFlorian Hahn /// value C. 173256f5738dSFlorian Hahn static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { 173356f5738dSFlorian Hahn return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C) 173456f5738dSFlorian Hahn : ConstantFP::get(Ty, C); 173556f5738dSFlorian Hahn } 173656f5738dSFlorian Hahn 173756f5738dSFlorian Hahn void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { 1738aae7ac66SFlorian Hahn assert(!State.Lane && "Int or FP induction being replicated."); 173956f5738dSFlorian Hahn 174056f5738dSFlorian Hahn Value *Start = getStartValue()->getLiveInIRValue(); 174156f5738dSFlorian Hahn const InductionDescriptor &ID = getInductionDescriptor(); 174256f5738dSFlorian Hahn TruncInst *Trunc = getTruncInst(); 174356f5738dSFlorian Hahn IRBuilderBase &Builder = State.Builder; 174495e509a9SFlorian Hahn assert(getPHINode()->getType() == ID.getStartValue()->getType() && 174595e509a9SFlorian Hahn "Types must match"); 174656f5738dSFlorian Hahn assert(State.VF.isVector() && "must have vector VF"); 174756f5738dSFlorian Hahn 174856f5738dSFlorian Hahn // The value from the original loop to which we are mapping the new induction 174956f5738dSFlorian Hahn // variable. 175095e509a9SFlorian Hahn Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : getPHINode(); 175156f5738dSFlorian Hahn 175256f5738dSFlorian Hahn // Fast-math-flags propagate from the original induction instruction. 175356f5738dSFlorian Hahn IRBuilder<>::FastMathFlagGuard FMFG(Builder); 175456f5738dSFlorian Hahn if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp())) 175556f5738dSFlorian Hahn Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); 175656f5738dSFlorian Hahn 175756f5738dSFlorian Hahn // Now do the actual transformations, and start with fetching the step value. 1758aae7ac66SFlorian Hahn Value *Step = State.get(getStepValue(), VPLane(0)); 175956f5738dSFlorian Hahn 176056f5738dSFlorian Hahn assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) && 176156f5738dSFlorian Hahn "Expected either an induction phi-node or a truncate of it!"); 176256f5738dSFlorian Hahn 176356f5738dSFlorian Hahn // Construct the initial value of the vector IV in the vector loop preheader 176456f5738dSFlorian Hahn auto CurrIP = Builder.saveIP(); 176556f5738dSFlorian Hahn BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 176656f5738dSFlorian Hahn Builder.SetInsertPoint(VectorPH->getTerminator()); 176756f5738dSFlorian Hahn if (isa<TruncInst>(EntryVal)) { 176856f5738dSFlorian Hahn assert(Start->getType()->isIntegerTy() && 176956f5738dSFlorian Hahn "Truncation requires an integer type"); 177056f5738dSFlorian Hahn auto *TruncType = cast<IntegerType>(EntryVal->getType()); 177156f5738dSFlorian Hahn Step = Builder.CreateTrunc(Step, TruncType); 177256f5738dSFlorian Hahn Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType); 177356f5738dSFlorian Hahn } 177456f5738dSFlorian Hahn 177556f5738dSFlorian Hahn Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start); 17764746395bSLuke Lau Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(), 17774746395bSLuke Lau State.VF, State.Builder); 177856f5738dSFlorian Hahn 177956f5738dSFlorian Hahn // We create vector phi nodes for both integer and floating-point induction 178056f5738dSFlorian Hahn // variables. Here, we determine the kind of arithmetic we will perform. 178156f5738dSFlorian Hahn Instruction::BinaryOps AddOp; 178256f5738dSFlorian Hahn Instruction::BinaryOps MulOp; 178356f5738dSFlorian Hahn if (Step->getType()->isIntegerTy()) { 178456f5738dSFlorian Hahn AddOp = Instruction::Add; 178556f5738dSFlorian Hahn MulOp = Instruction::Mul; 178656f5738dSFlorian Hahn } else { 178756f5738dSFlorian Hahn AddOp = ID.getInductionOpcode(); 178856f5738dSFlorian Hahn MulOp = Instruction::FMul; 178956f5738dSFlorian Hahn } 179056f5738dSFlorian Hahn 17918ec40675SFlorian Hahn Value *SplatVF; 17928ec40675SFlorian Hahn if (VPValue *SplatVFOperand = getSplatVFValue()) { 17938ec40675SFlorian Hahn // The recipe has been unrolled. In that case, fetch the splat value for the 17948ec40675SFlorian Hahn // induction increment. 179557f5d8f2SFlorian Hahn SplatVF = State.get(SplatVFOperand); 17968ec40675SFlorian Hahn } else { 179756f5738dSFlorian Hahn // Multiply the vectorization factor by the step using integer or 179856f5738dSFlorian Hahn // floating-point arithmetic as appropriate. 179956f5738dSFlorian Hahn Type *StepType = Step->getType(); 1800aae7ac66SFlorian Hahn Value *RuntimeVF = State.get(getVFValue(), VPLane(0)); 180156f5738dSFlorian Hahn if (Step->getType()->isFloatingPointTy()) 1802a794ee45SFlorian Hahn RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType); 180356f5738dSFlorian Hahn else 1804a794ee45SFlorian Hahn RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, StepType); 180556f5738dSFlorian Hahn Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF); 180656f5738dSFlorian Hahn 180756f5738dSFlorian Hahn // Create a vector splat to use in the induction update. 1808d9c26957SLuke Lau SplatVF = Builder.CreateVectorSplat(State.VF, Mul); 18098ec40675SFlorian Hahn } 18108ec40675SFlorian Hahn 181156f5738dSFlorian Hahn Builder.restoreIP(CurrIP); 181256f5738dSFlorian Hahn 181356f5738dSFlorian Hahn // We may need to add the step a number of times, depending on the unroll 181456f5738dSFlorian Hahn // factor. The last of those goes into the PHI. 18156942c64eSJeremy Morse PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind"); 18166942c64eSJeremy Morse VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); 1817734a204fSFlorian Hahn VecInd->setDebugLoc(getDebugLoc()); 181857f5d8f2SFlorian Hahn State.set(this, VecInd); 181956f5738dSFlorian Hahn 1820040bb371SFlorian Hahn Instruction *LastInduction = cast<Instruction>( 1821040bb371SFlorian Hahn Builder.CreateBinOp(AddOp, VecInd, SplatVF, "vec.ind.next")); 182256f5738dSFlorian Hahn if (isa<TruncInst>(EntryVal)) 182356f5738dSFlorian Hahn State.addMetadata(LastInduction, EntryVal); 1824734a204fSFlorian Hahn LastInduction->setDebugLoc(getDebugLoc()); 182556f5738dSFlorian Hahn 182656f5738dSFlorian Hahn VecInd->addIncoming(SteppedStart, VectorPH); 182756f5738dSFlorian Hahn // Add induction update using an incorrect block temporarily. The phi node 182856f5738dSFlorian Hahn // will be fixed after VPlan execution. Note that at this point the latch 182956f5738dSFlorian Hahn // block cannot be used, as it does not exist yet. 183056f5738dSFlorian Hahn // TODO: Model increment value in VPlan, by turning the recipe into a 183156f5738dSFlorian Hahn // multi-def and a subclass of VPHeaderPHIRecipe. 183256f5738dSFlorian Hahn VecInd->addIncoming(LastInduction, VectorPH); 183356f5738dSFlorian Hahn } 183456f5738dSFlorian Hahn 183556f5738dSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 183603975b7fSFlorian Hahn void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, 183703975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const { 183843045051SFlorian Hahn O << Indent; 183943045051SFlorian Hahn printAsOperand(O, SlotTracker); 184043045051SFlorian Hahn O << " = WIDEN-INDUCTION "; 184143045051SFlorian Hahn printOperands(O, SlotTracker); 184203975b7fSFlorian Hahn 184343045051SFlorian Hahn if (auto *TI = getTruncInst()) 184443045051SFlorian Hahn O << " (truncated to " << *TI->getType() << ")"; 184503975b7fSFlorian Hahn } 184603975b7fSFlorian Hahn #endif 184703975b7fSFlorian Hahn 184803975b7fSFlorian Hahn bool VPWidenIntOrFpInductionRecipe::isCanonical() const { 18492db03152SFlorian Hahn // The step may be defined by a recipe in the preheader (e.g. if it requires 18502db03152SFlorian Hahn // SCEV expansion), but for the canonical induction the step is required to be 18512db03152SFlorian Hahn // 1, which is represented as live-in. 18522db03152SFlorian Hahn if (getStepValue()->getDefiningRecipe()) 18532db03152SFlorian Hahn return false; 18542db03152SFlorian Hahn auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue()); 185503975b7fSFlorian Hahn auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); 1856a48ebb82SFlorian Hahn auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin()); 1857a48ebb82SFlorian Hahn return StartC && StartC->isZero() && StepC && StepC->isOne() && 1858a48ebb82SFlorian Hahn getScalarType() == CanIV->getScalarType(); 185903975b7fSFlorian Hahn } 186003975b7fSFlorian Hahn 18610c5df7cdSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 18620c5df7cdSFlorian Hahn void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent, 18630c5df7cdSFlorian Hahn VPSlotTracker &SlotTracker) const { 18640c5df7cdSFlorian Hahn O << Indent; 18650c5df7cdSFlorian Hahn printAsOperand(O, SlotTracker); 18662f0d3269SShao-Ce SUN O << " = DERIVED-IV "; 18670c5df7cdSFlorian Hahn getStartValue()->printAsOperand(O, SlotTracker); 18680c5df7cdSFlorian Hahn O << " + "; 1869413a66f3SAlexey Bataev getOperand(1)->printAsOperand(O, SlotTracker); 18700c5df7cdSFlorian Hahn O << " * "; 18710c5df7cdSFlorian Hahn getStepValue()->printAsOperand(O, SlotTracker); 1872bf15f1e4SFlorian Hahn } 18730c5df7cdSFlorian Hahn #endif 187403975b7fSFlorian Hahn 187556f5738dSFlorian Hahn void VPScalarIVStepsRecipe::execute(VPTransformState &State) { 187656f5738dSFlorian Hahn // Fast-math-flags propagate from the original induction instruction. 187756f5738dSFlorian Hahn IRBuilder<>::FastMathFlagGuard FMFG(State.Builder); 18783e2d564cSFlorian Hahn if (hasFastMathFlags()) 18793e2d564cSFlorian Hahn State.Builder.setFastMathFlags(getFastMathFlags()); 188056f5738dSFlorian Hahn 188156f5738dSFlorian Hahn /// Compute scalar induction steps. \p ScalarIV is the scalar induction 188256f5738dSFlorian Hahn /// variable on which to base the steps, \p Step is the size of the step. 188356f5738dSFlorian Hahn 1884aae7ac66SFlorian Hahn Value *BaseIV = State.get(getOperand(0), VPLane(0)); 1885aae7ac66SFlorian Hahn Value *Step = State.get(getStepValue(), VPLane(0)); 188656f5738dSFlorian Hahn IRBuilderBase &Builder = State.Builder; 188756f5738dSFlorian Hahn 188856f5738dSFlorian Hahn // Ensure step has the same type as that of scalar IV. 188956f5738dSFlorian Hahn Type *BaseIVTy = BaseIV->getType()->getScalarType(); 18900ab539fdSFlorian Hahn assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!"); 189156f5738dSFlorian Hahn 189256f5738dSFlorian Hahn // We build scalar steps for both integer and floating-point induction 189356f5738dSFlorian Hahn // variables. Here, we determine the kind of arithmetic we will perform. 189456f5738dSFlorian Hahn Instruction::BinaryOps AddOp; 189556f5738dSFlorian Hahn Instruction::BinaryOps MulOp; 189656f5738dSFlorian Hahn if (BaseIVTy->isIntegerTy()) { 189756f5738dSFlorian Hahn AddOp = Instruction::Add; 189856f5738dSFlorian Hahn MulOp = Instruction::Mul; 189956f5738dSFlorian Hahn } else { 19003e2d564cSFlorian Hahn AddOp = InductionOpcode; 190156f5738dSFlorian Hahn MulOp = Instruction::FMul; 190256f5738dSFlorian Hahn } 190356f5738dSFlorian Hahn 190456f5738dSFlorian Hahn // Determine the number of scalars we need to generate for each unroll 190556f5738dSFlorian Hahn // iteration. 190656f5738dSFlorian Hahn bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this); 190756f5738dSFlorian Hahn // Compute the scalar steps and save the results in State. 190856f5738dSFlorian Hahn Type *IntStepTy = 190956f5738dSFlorian Hahn IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits()); 191056f5738dSFlorian Hahn Type *VecIVTy = nullptr; 191156f5738dSFlorian Hahn Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr; 191256f5738dSFlorian Hahn if (!FirstLaneOnly && State.VF.isScalable()) { 191356f5738dSFlorian Hahn VecIVTy = VectorType::get(BaseIVTy, State.VF); 191456f5738dSFlorian Hahn UnitStepVec = 191556f5738dSFlorian Hahn Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF)); 191656f5738dSFlorian Hahn SplatStep = Builder.CreateVectorSplat(State.VF, Step); 191756f5738dSFlorian Hahn SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV); 191856f5738dSFlorian Hahn } 191956f5738dSFlorian Hahn 192056f5738dSFlorian Hahn unsigned StartLane = 0; 192156f5738dSFlorian Hahn unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue(); 1922aae7ac66SFlorian Hahn if (State.Lane) { 1923aae7ac66SFlorian Hahn StartLane = State.Lane->getKnownLane(); 192456f5738dSFlorian Hahn EndLane = StartLane + 1; 192556f5738dSFlorian Hahn } 19268ec40675SFlorian Hahn Value *StartIdx0 = 19278ec40675SFlorian Hahn createStepForVF(Builder, IntStepTy, State.VF, getUnrollPart(*this)); 192856f5738dSFlorian Hahn 192956f5738dSFlorian Hahn if (!FirstLaneOnly && State.VF.isScalable()) { 193056f5738dSFlorian Hahn auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0); 193156f5738dSFlorian Hahn auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec); 193256f5738dSFlorian Hahn if (BaseIVTy->isFloatingPointTy()) 193356f5738dSFlorian Hahn InitVec = Builder.CreateSIToFP(InitVec, VecIVTy); 193456f5738dSFlorian Hahn auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep); 193556f5738dSFlorian Hahn auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul); 193657f5d8f2SFlorian Hahn State.set(this, Add); 193756f5738dSFlorian Hahn // It's useful to record the lane values too for the known minimum number 193856f5738dSFlorian Hahn // of elements so we do those below. This improves the code quality when 193956f5738dSFlorian Hahn // trying to extract the first element, for example. 194056f5738dSFlorian Hahn } 194156f5738dSFlorian Hahn 194256f5738dSFlorian Hahn if (BaseIVTy->isFloatingPointTy()) 194356f5738dSFlorian Hahn StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy); 194456f5738dSFlorian Hahn 194556f5738dSFlorian Hahn for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) { 194656f5738dSFlorian Hahn Value *StartIdx = Builder.CreateBinOp( 194756f5738dSFlorian Hahn AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane)); 194856f5738dSFlorian Hahn // The step returned by `createStepForVF` is a runtime-evaluated value 194956f5738dSFlorian Hahn // when VF is scalable. Otherwise, it should be folded into a Constant. 195056f5738dSFlorian Hahn assert((State.VF.isScalable() || isa<Constant>(StartIdx)) && 195156f5738dSFlorian Hahn "Expected StartIdx to be folded to a constant when VF is not " 195256f5738dSFlorian Hahn "scalable"); 195356f5738dSFlorian Hahn auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step); 195456f5738dSFlorian Hahn auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul); 1955aae7ac66SFlorian Hahn State.set(this, Add, VPLane(Lane)); 195656f5738dSFlorian Hahn } 195756f5738dSFlorian Hahn } 195856f5738dSFlorian Hahn 195903975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 196003975b7fSFlorian Hahn void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, 196103975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const { 196203975b7fSFlorian Hahn O << Indent; 196303975b7fSFlorian Hahn printAsOperand(O, SlotTracker); 19641d1cba44SFlorian Hahn O << " = SCALAR-STEPS "; 196503975b7fSFlorian Hahn printOperands(O, SlotTracker); 196603975b7fSFlorian Hahn } 19676a4bc452SFlorian Hahn #endif 196803975b7fSFlorian Hahn 19696a4bc452SFlorian Hahn void VPWidenGEPRecipe::execute(VPTransformState &State) { 197001fa764cSFlorian Hahn assert(State.VF.isVector() && "not widening"); 19716a4bc452SFlorian Hahn auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr()); 19726a4bc452SFlorian Hahn // Construct a vector GEP by widening the operands of the scalar GEP as 19736a4bc452SFlorian Hahn // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP 19746a4bc452SFlorian Hahn // results in a vector of pointers when at least one operand of the GEP 19756a4bc452SFlorian Hahn // is vector-typed. Thus, to keep the representation compact, we only use 19766a4bc452SFlorian Hahn // vector-typed operands for loop-varying values. 19776a4bc452SFlorian Hahn 197801fa764cSFlorian Hahn if (areAllOperandsInvariant()) { 19796a4bc452SFlorian Hahn // If we are vectorizing, but the GEP has only loop-invariant operands, 19806a4bc452SFlorian Hahn // the GEP we build (by only using vector-typed operands for 19816a4bc452SFlorian Hahn // loop-varying values) would be a scalar pointer. Thus, to ensure we 19826a4bc452SFlorian Hahn // produce a vector of pointers, we need to either arbitrarily pick an 19836a4bc452SFlorian Hahn // operand to broadcast, or broadcast a clone of the original GEP. 19846a4bc452SFlorian Hahn // Here, we broadcast a clone of the original. 19856a4bc452SFlorian Hahn // 19866a4bc452SFlorian Hahn // TODO: If at some point we decide to scalarize instructions having 19876a4bc452SFlorian Hahn // loop-invariant operands, this special case will no longer be 19886a4bc452SFlorian Hahn // required. We would add the scalarization decision to 19896a4bc452SFlorian Hahn // collectLoopScalars() and teach getVectorValue() to broadcast 19906a4bc452SFlorian Hahn // the lane-zero scalar value. 19910a246a0cSFlorian Hahn SmallVector<Value *> Ops; 19920a246a0cSFlorian Hahn for (unsigned I = 0, E = getNumOperands(); I != E; I++) 1993aae7ac66SFlorian Hahn Ops.push_back(State.get(getOperand(I), VPLane(0))); 19940a246a0cSFlorian Hahn 199511571874SNikita Popov auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0], 199611571874SNikita Popov ArrayRef(Ops).drop_front(), "", 199711571874SNikita Popov getGEPNoWrapFlags()); 199806c3a7d2SFlorian Hahn Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP); 199957f5d8f2SFlorian Hahn State.set(this, Splat); 200006c3a7d2SFlorian Hahn State.addMetadata(Splat, GEP); 20016a4bc452SFlorian Hahn } else { 20026a4bc452SFlorian Hahn // If the GEP has at least one loop-varying operand, we are sure to 200306c3a7d2SFlorian Hahn // produce a vector of pointers unless VF is scalar. 20046a4bc452SFlorian Hahn // The pointer operand of the new GEP. If it's loop-invariant, we 20056a4bc452SFlorian Hahn // won't broadcast it. 2006aae7ac66SFlorian Hahn auto *Ptr = isPointerLoopInvariant() ? State.get(getOperand(0), VPLane(0)) 200757f5d8f2SFlorian Hahn : State.get(getOperand(0)); 20086a4bc452SFlorian Hahn 20096a4bc452SFlorian Hahn // Collect all the indices for the new GEP. If any index is 20106a4bc452SFlorian Hahn // loop-invariant, we won't broadcast it. 20116a4bc452SFlorian Hahn SmallVector<Value *, 4> Indices; 20126a4bc452SFlorian Hahn for (unsigned I = 1, E = getNumOperands(); I < E; I++) { 20136a4bc452SFlorian Hahn VPValue *Operand = getOperand(I); 2014a8adb38aSFlorian Hahn if (isIndexLoopInvariant(I - 1)) 2015aae7ac66SFlorian Hahn Indices.push_back(State.get(Operand, VPLane(0))); 20166a4bc452SFlorian Hahn else 201757f5d8f2SFlorian Hahn Indices.push_back(State.get(Operand)); 20186a4bc452SFlorian Hahn } 20196a4bc452SFlorian Hahn 20206a4bc452SFlorian Hahn // Create the new GEP. Note that this GEP may be a scalar if VF == 1, 20216a4bc452SFlorian Hahn // but it should be a vector, otherwise. 20226a4bc452SFlorian Hahn auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr, 202311571874SNikita Popov Indices, "", getGEPNoWrapFlags()); 20246a4bc452SFlorian Hahn assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && 20256a4bc452SFlorian Hahn "NewGEP is not a pointer vector"); 202657f5d8f2SFlorian Hahn State.set(this, NewGEP); 20276a4bc452SFlorian Hahn State.addMetadata(NewGEP, GEP); 20286a4bc452SFlorian Hahn } 20296a4bc452SFlorian Hahn } 20306a4bc452SFlorian Hahn 20316a4bc452SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 203203975b7fSFlorian Hahn void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, 203303975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const { 203403975b7fSFlorian Hahn O << Indent << "WIDEN-GEP "; 2035a8adb38aSFlorian Hahn O << (isPointerLoopInvariant() ? "Inv" : "Var"); 2036a8adb38aSFlorian Hahn for (size_t I = 0; I < getNumOperands() - 1; ++I) 2037a8adb38aSFlorian Hahn O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]"; 203803975b7fSFlorian Hahn 203903975b7fSFlorian Hahn O << " "; 204003975b7fSFlorian Hahn printAsOperand(O, SlotTracker); 204103975b7fSFlorian Hahn O << " = getelementptr"; 2042299f0ff6SFlorian Hahn printFlags(O); 204303975b7fSFlorian Hahn printOperands(O, SlotTracker); 204403975b7fSFlorian Hahn } 20455d135041SFlorian Hahn #endif 204603975b7fSFlorian Hahn 2047266ff98cSShih-Po Hung static Type *getGEPIndexTy(bool IsScalable, bool IsReverse, 2048266ff98cSShih-Po Hung unsigned CurrentPart, IRBuilderBase &Builder) { 2049f18536d6SFlorian Hahn // Use i32 for the gep index type when the value is constant, 2050f18536d6SFlorian Hahn // or query DataLayout for a more suitable index type otherwise. 205106c3a7d2SFlorian Hahn const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); 2052266ff98cSShih-Po Hung return IsScalable && (IsReverse || CurrentPart > 0) 2053e177dd6fSYoungsuk Kim ? DL.getIndexType(Builder.getPtrTy(0)) 2054f18536d6SFlorian Hahn : Builder.getInt32Ty(); 2055266ff98cSShih-Po Hung } 205606c3a7d2SFlorian Hahn 2057266ff98cSShih-Po Hung void VPReverseVectorPointerRecipe::execute(VPTransformState &State) { 2058266ff98cSShih-Po Hung auto &Builder = State.Builder; 2059266ff98cSShih-Po Hung State.setDebugLocFrom(getDebugLoc()); 2060266ff98cSShih-Po Hung unsigned CurrentPart = getUnrollPart(*this); 2061266ff98cSShih-Po Hung Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true, 2062266ff98cSShih-Po Hung CurrentPart, Builder); 2063266ff98cSShih-Po Hung 2064266ff98cSShih-Po Hung // The wide store needs to start at the last vector element. 2065266ff98cSShih-Po Hung Value *RunTimeVF = State.get(getVFValue(), VPLane(0)); 2066266ff98cSShih-Po Hung if (IndexTy != RunTimeVF->getType()) 2067266ff98cSShih-Po Hung RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy); 20688ec40675SFlorian Hahn // NumElt = -CurrentPart * RunTimeVF 2069f18536d6SFlorian Hahn Value *NumElt = Builder.CreateMul( 20708ec40675SFlorian Hahn ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF); 2071f18536d6SFlorian Hahn // LastLane = 1 - RunTimeVF 2072266ff98cSShih-Po Hung Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); 2073266ff98cSShih-Po Hung Value *Ptr = State.get(getOperand(0), VPLane(0)); 207411571874SNikita Popov Value *ResultPtr = 207511571874SNikita Popov Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags()); 207611571874SNikita Popov ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", 207711571874SNikita Popov getGEPNoWrapFlags()); 2078266ff98cSShih-Po Hung 2079266ff98cSShih-Po Hung State.set(this, ResultPtr, /*IsScalar*/ true); 2080f18536d6SFlorian Hahn } 2081f18536d6SFlorian Hahn 2082266ff98cSShih-Po Hung #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2083266ff98cSShih-Po Hung void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent, 2084266ff98cSShih-Po Hung VPSlotTracker &SlotTracker) const { 2085266ff98cSShih-Po Hung O << Indent; 2086266ff98cSShih-Po Hung printAsOperand(O, SlotTracker); 2087266ff98cSShih-Po Hung O << " = reverse-vector-pointer"; 208811571874SNikita Popov printFlags(O); 2089266ff98cSShih-Po Hung printOperands(O, SlotTracker); 2090266ff98cSShih-Po Hung } 2091266ff98cSShih-Po Hung #endif 2092266ff98cSShih-Po Hung 2093266ff98cSShih-Po Hung void VPVectorPointerRecipe::execute(VPTransformState &State) { 2094266ff98cSShih-Po Hung auto &Builder = State.Builder; 2095266ff98cSShih-Po Hung State.setDebugLocFrom(getDebugLoc()); 2096266ff98cSShih-Po Hung unsigned CurrentPart = getUnrollPart(*this); 2097266ff98cSShih-Po Hung Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false, 2098266ff98cSShih-Po Hung CurrentPart, Builder); 2099266ff98cSShih-Po Hung Value *Ptr = State.get(getOperand(0), VPLane(0)); 2100266ff98cSShih-Po Hung 2101266ff98cSShih-Po Hung Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart); 210211571874SNikita Popov Value *ResultPtr = 210311571874SNikita Popov Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags()); 2104266ff98cSShih-Po Hung 210557f5d8f2SFlorian Hahn State.set(this, ResultPtr, /*IsScalar*/ true); 2106f18536d6SFlorian Hahn } 2107f18536d6SFlorian Hahn 2108f18536d6SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2109f18536d6SFlorian Hahn void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent, 2110f18536d6SFlorian Hahn VPSlotTracker &SlotTracker) const { 2111f18536d6SFlorian Hahn O << Indent; 2112f18536d6SFlorian Hahn printAsOperand(O, SlotTracker); 2113f18536d6SFlorian Hahn O << " = vector-pointer "; 2114f18536d6SFlorian Hahn 2115f18536d6SFlorian Hahn printOperands(O, SlotTracker); 2116f18536d6SFlorian Hahn } 2117f18536d6SFlorian Hahn #endif 2118f18536d6SFlorian Hahn 21195d135041SFlorian Hahn void VPBlendRecipe::execute(VPTransformState &State) { 21204f075086SPaul Walker assert(isNormalized() && "Expected blend to be normalized!"); 2121165e24aaSFlorian Hahn State.setDebugLocFrom(getDebugLoc()); 21225d135041SFlorian Hahn // We know that all PHIs in non-header blocks are converted into 21235d135041SFlorian Hahn // selects, so we don't have to worry about the insertion order and we 21245d135041SFlorian Hahn // can just use the builder. 21255d135041SFlorian Hahn // At this point we generate the predication tree. There may be 21265d135041SFlorian Hahn // duplications since this is a simple recursive scan, but future 21275d135041SFlorian Hahn // optimizations will clean it up. 21285d135041SFlorian Hahn 21295d135041SFlorian Hahn unsigned NumIncoming = getNumIncomingValues(); 21305d135041SFlorian Hahn 21315d135041SFlorian Hahn // Generate a sequence of selects of the form: 21325d135041SFlorian Hahn // SELECT(Mask3, In3, 21335d135041SFlorian Hahn // SELECT(Mask2, In2, 21345d135041SFlorian Hahn // SELECT(Mask1, In1, 21355d135041SFlorian Hahn // In0))) 21365d135041SFlorian Hahn // Note that Mask0 is never used: lanes for which no path reaches this phi and 21375d135041SFlorian Hahn // are essentially undef are taken from In0. 2138d187005cSFlorian Hahn bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); 213906c3a7d2SFlorian Hahn Value *Result = nullptr; 21405d135041SFlorian Hahn for (unsigned In = 0; In < NumIncoming; ++In) { 21415d135041SFlorian Hahn // We might have single edge PHIs (blocks) - use an identity 21425d135041SFlorian Hahn // 'select' for the first PHI operand. 214357f5d8f2SFlorian Hahn Value *In0 = State.get(getIncomingValue(In), OnlyFirstLaneUsed); 21445d135041SFlorian Hahn if (In == 0) 214506c3a7d2SFlorian Hahn Result = In0; // Initialize with the first incoming value. 21465d135041SFlorian Hahn else { 21475d135041SFlorian Hahn // Select between the current value and the previous incoming edge 21485d135041SFlorian Hahn // based on the incoming mask. 214957f5d8f2SFlorian Hahn Value *Cond = State.get(getMask(In), OnlyFirstLaneUsed); 215006c3a7d2SFlorian Hahn Result = State.Builder.CreateSelect(Cond, In0, Result, "predphi"); 21515d135041SFlorian Hahn } 21525d135041SFlorian Hahn } 215357f5d8f2SFlorian Hahn State.set(this, Result, OnlyFirstLaneUsed); 21545d135041SFlorian Hahn } 21555d135041SFlorian Hahn 215636fc291bSFlorian Hahn InstructionCost VPBlendRecipe::computeCost(ElementCount VF, 215736fc291bSFlorian Hahn VPCostContext &Ctx) const { 215836fc291bSFlorian Hahn // Handle cases where only the first lane is used the same way as the legacy 215936fc291bSFlorian Hahn // cost model. 216036fc291bSFlorian Hahn if (vputils::onlyFirstLaneUsed(this)) 2161edf3a55bSJohn Brawn return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind); 216236fc291bSFlorian Hahn 21639ab5474eSBenjamin Maxwell Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); 21649ab5474eSBenjamin Maxwell Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF); 216536fc291bSFlorian Hahn return (getNumIncomingValues() - 1) * 216636fc291bSFlorian Hahn Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy, 2167edf3a55bSJohn Brawn CmpInst::BAD_ICMP_PREDICATE, Ctx.CostKind); 216836fc291bSFlorian Hahn } 216936fc291bSFlorian Hahn 21705d135041SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 217103975b7fSFlorian Hahn void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, 217203975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const { 217303975b7fSFlorian Hahn O << Indent << "BLEND "; 21743fa1b254SFlorian Hahn printAsOperand(O, SlotTracker); 217503975b7fSFlorian Hahn O << " ="; 217603975b7fSFlorian Hahn if (getNumIncomingValues() == 1) { 217703975b7fSFlorian Hahn // Not a User of any mask: not really blending, this is a 217803975b7fSFlorian Hahn // single-predecessor phi. 217903975b7fSFlorian Hahn O << " "; 218003975b7fSFlorian Hahn getIncomingValue(0)->printAsOperand(O, SlotTracker); 218103975b7fSFlorian Hahn } else { 218203975b7fSFlorian Hahn for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { 218303975b7fSFlorian Hahn O << " "; 218403975b7fSFlorian Hahn getIncomingValue(I)->printAsOperand(O, SlotTracker); 2185c8369836SFlorian Hahn if (I == 0) 2186c8369836SFlorian Hahn continue; 218703975b7fSFlorian Hahn O << "/"; 218803975b7fSFlorian Hahn getMask(I)->printAsOperand(O, SlotTracker); 218903975b7fSFlorian Hahn } 219003975b7fSFlorian Hahn } 219103975b7fSFlorian Hahn } 219215d11a4dSFlorian Hahn #endif 219303975b7fSFlorian Hahn 219415d11a4dSFlorian Hahn void VPReductionRecipe::execute(VPTransformState &State) { 2195aae7ac66SFlorian Hahn assert(!State.Lane && "Reduction being replicated."); 219657f5d8f2SFlorian Hahn Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true); 219715d11a4dSFlorian Hahn RecurKind Kind = RdxDesc.getRecurrenceKind(); 219815d11a4dSFlorian Hahn // Propagate the fast-math flags carried by the underlying instruction. 219915d11a4dSFlorian Hahn IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 220015d11a4dSFlorian Hahn State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); 220147e1c87aSElvis Wang State.setDebugLocFrom(getDebugLoc()); 220257f5d8f2SFlorian Hahn Value *NewVecOp = State.get(getVecOp()); 220315d11a4dSFlorian Hahn if (VPValue *Cond = getCondOp()) { 220457f5d8f2SFlorian Hahn Value *NewCond = State.get(Cond, State.VF.isScalar()); 220515d11a4dSFlorian Hahn VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType()); 220615d11a4dSFlorian Hahn Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType(); 220715d11a4dSFlorian Hahn 22080b2f2537SPhilip Reames Value *Start; 22090b2f2537SPhilip Reames if (RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind)) 22100b2f2537SPhilip Reames Start = RdxDesc.getRecurrenceStartValue(); 22110b2f2537SPhilip Reames else 22123d9abfc9SPhilip Reames Start = llvm::getRecurrenceIdentity(Kind, ElementTy, 22130b2f2537SPhilip Reames RdxDesc.getFastMathFlags()); 22140b2f2537SPhilip Reames if (State.VF.isVector()) 221506c3a7d2SFlorian Hahn Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start); 22160b2f2537SPhilip Reames 22170b2f2537SPhilip Reames Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start); 221815d11a4dSFlorian Hahn NewVecOp = Select; 221915d11a4dSFlorian Hahn } 222015d11a4dSFlorian Hahn Value *NewRed; 222115d11a4dSFlorian Hahn Value *NextInChain; 222215d11a4dSFlorian Hahn if (IsOrdered) { 222315d11a4dSFlorian Hahn if (State.VF.isVector()) 222406c3a7d2SFlorian Hahn NewRed = 222506c3a7d2SFlorian Hahn createOrderedReduction(State.Builder, RdxDesc, NewVecOp, PrevInChain); 222615d11a4dSFlorian Hahn else 222715d11a4dSFlorian Hahn NewRed = State.Builder.CreateBinOp( 22282a0ee090SRamkumar Ramachandra (Instruction::BinaryOps)RdxDesc.getOpcode(), PrevInChain, NewVecOp); 222915d11a4dSFlorian Hahn PrevInChain = NewRed; 2230c53008deSPhilip Reames NextInChain = NewRed; 223115d11a4dSFlorian Hahn } else { 223257f5d8f2SFlorian Hahn PrevInChain = State.get(getChainOp(), /*IsScalar*/ true); 22333e8840baSPhilip Reames NewRed = createReduction(State.Builder, RdxDesc, NewVecOp); 2234c53008deSPhilip Reames if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) 223515d11a4dSFlorian Hahn NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(), 223615d11a4dSFlorian Hahn NewRed, PrevInChain); 223715d11a4dSFlorian Hahn else 223815d11a4dSFlorian Hahn NextInChain = State.Builder.CreateBinOp( 22392a0ee090SRamkumar Ramachandra (Instruction::BinaryOps)RdxDesc.getOpcode(), NewRed, PrevInChain); 2240c53008deSPhilip Reames } 224157f5d8f2SFlorian Hahn State.set(this, NextInChain, /*IsScalar*/ true); 224215d11a4dSFlorian Hahn } 224315d11a4dSFlorian Hahn 22444eb30cfbSMel Chen void VPReductionEVLRecipe::execute(VPTransformState &State) { 2245aae7ac66SFlorian Hahn assert(!State.Lane && "Reduction being replicated."); 22464eb30cfbSMel Chen 22474eb30cfbSMel Chen auto &Builder = State.Builder; 22484eb30cfbSMel Chen // Propagate the fast-math flags carried by the underlying instruction. 22494eb30cfbSMel Chen IRBuilderBase::FastMathFlagGuard FMFGuard(Builder); 22504eb30cfbSMel Chen const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor(); 22514eb30cfbSMel Chen Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); 22524eb30cfbSMel Chen 22534eb30cfbSMel Chen RecurKind Kind = RdxDesc.getRecurrenceKind(); 225457f5d8f2SFlorian Hahn Value *Prev = State.get(getChainOp(), /*IsScalar*/ true); 225557f5d8f2SFlorian Hahn Value *VecOp = State.get(getVecOp()); 2256aae7ac66SFlorian Hahn Value *EVL = State.get(getEVL(), VPLane(0)); 22574eb30cfbSMel Chen 22584eb30cfbSMel Chen VectorBuilder VBuilder(Builder); 22594eb30cfbSMel Chen VBuilder.setEVL(EVL); 22604eb30cfbSMel Chen Value *Mask; 22614eb30cfbSMel Chen // TODO: move the all-true mask generation into VectorBuilder. 22624eb30cfbSMel Chen if (VPValue *CondOp = getCondOp()) 226357f5d8f2SFlorian Hahn Mask = State.get(CondOp); 22644eb30cfbSMel Chen else 22654eb30cfbSMel Chen Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue()); 22664eb30cfbSMel Chen VBuilder.setMask(Mask); 22674eb30cfbSMel Chen 22684eb30cfbSMel Chen Value *NewRed; 22694eb30cfbSMel Chen if (isOrdered()) { 22704eb30cfbSMel Chen NewRed = createOrderedReduction(VBuilder, RdxDesc, VecOp, Prev); 22714eb30cfbSMel Chen } else { 22723e8840baSPhilip Reames NewRed = createSimpleReduction(VBuilder, VecOp, RdxDesc); 22734eb30cfbSMel Chen if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) 22744eb30cfbSMel Chen NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev); 22754eb30cfbSMel Chen else 22762a0ee090SRamkumar Ramachandra NewRed = Builder.CreateBinOp((Instruction::BinaryOps)RdxDesc.getOpcode(), 22772a0ee090SRamkumar Ramachandra NewRed, Prev); 22784eb30cfbSMel Chen } 227957f5d8f2SFlorian Hahn State.set(this, NewRed, /*IsScalar*/ true); 22804eb30cfbSMel Chen } 22814eb30cfbSMel Chen 22823c91a2f7SElvis Wang InstructionCost VPReductionRecipe::computeCost(ElementCount VF, 22833c91a2f7SElvis Wang VPCostContext &Ctx) const { 22843c91a2f7SElvis Wang RecurKind RdxKind = RdxDesc.getRecurrenceKind(); 22853c91a2f7SElvis Wang Type *ElementTy = Ctx.Types.inferScalarType(this); 22869ab5474eSBenjamin Maxwell auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF)); 22873c91a2f7SElvis Wang unsigned Opcode = RdxDesc.getOpcode(); 22883c91a2f7SElvis Wang 22893c91a2f7SElvis Wang // TODO: Support any-of and in-loop reductions. 22903c91a2f7SElvis Wang assert( 22913c91a2f7SElvis Wang (!RecurrenceDescriptor::isAnyOfRecurrenceKind(RdxKind) || 22923c91a2f7SElvis Wang ForceTargetInstructionCost.getNumOccurrences() > 0) && 22933c91a2f7SElvis Wang "Any-of reduction not implemented in VPlan-based cost model currently."); 22943c91a2f7SElvis Wang assert( 22953c91a2f7SElvis Wang (!cast<VPReductionPHIRecipe>(getOperand(0))->isInLoop() || 22963c91a2f7SElvis Wang ForceTargetInstructionCost.getNumOccurrences() > 0) && 22973c91a2f7SElvis Wang "In-loop reduction not implemented in VPlan-based cost model currently."); 22983c91a2f7SElvis Wang 22993c91a2f7SElvis Wang assert(ElementTy->getTypeID() == RdxDesc.getRecurrenceType()->getTypeID() && 23003c91a2f7SElvis Wang "Inferred type and recurrence type mismatch."); 23013c91a2f7SElvis Wang 23023c91a2f7SElvis Wang // Cost = Reduction cost + BinOp cost 23033c91a2f7SElvis Wang InstructionCost Cost = 2304edf3a55bSJohn Brawn Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, Ctx.CostKind); 23053c91a2f7SElvis Wang if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind)) { 23063c91a2f7SElvis Wang Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind); 23073c91a2f7SElvis Wang return Cost + Ctx.TTI.getMinMaxReductionCost( 2308edf3a55bSJohn Brawn Id, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind); 23093c91a2f7SElvis Wang } 23103c91a2f7SElvis Wang 23113c91a2f7SElvis Wang return Cost + Ctx.TTI.getArithmeticReductionCost( 2312edf3a55bSJohn Brawn Opcode, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind); 23133c91a2f7SElvis Wang } 23143c91a2f7SElvis Wang 231515d11a4dSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 231603975b7fSFlorian Hahn void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, 231703975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const { 231803975b7fSFlorian Hahn O << Indent << "REDUCE "; 231903975b7fSFlorian Hahn printAsOperand(O, SlotTracker); 232003975b7fSFlorian Hahn O << " = "; 232103975b7fSFlorian Hahn getChainOp()->printAsOperand(O, SlotTracker); 232203975b7fSFlorian Hahn O << " +"; 232303975b7fSFlorian Hahn if (isa<FPMathOperator>(getUnderlyingInstr())) 232403975b7fSFlorian Hahn O << getUnderlyingInstr()->getFastMathFlags(); 2325463e7cb8SMel Chen O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " ("; 232603975b7fSFlorian Hahn getVecOp()->printAsOperand(O, SlotTracker); 23274eb30cfbSMel Chen if (isConditional()) { 23284eb30cfbSMel Chen O << ", "; 23294eb30cfbSMel Chen getCondOp()->printAsOperand(O, SlotTracker); 23304eb30cfbSMel Chen } 23314eb30cfbSMel Chen O << ")"; 23324eb30cfbSMel Chen if (RdxDesc.IntermediateStore) 23334eb30cfbSMel Chen O << " (with final reduction value stored in invariant address sank " 23344eb30cfbSMel Chen "outside of loop)"; 23354eb30cfbSMel Chen } 23364eb30cfbSMel Chen 23374eb30cfbSMel Chen void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent, 23384eb30cfbSMel Chen VPSlotTracker &SlotTracker) const { 23394eb30cfbSMel Chen const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor(); 23404eb30cfbSMel Chen O << Indent << "REDUCE "; 23414eb30cfbSMel Chen printAsOperand(O, SlotTracker); 23424eb30cfbSMel Chen O << " = "; 23434eb30cfbSMel Chen getChainOp()->printAsOperand(O, SlotTracker); 23444eb30cfbSMel Chen O << " +"; 23454eb30cfbSMel Chen if (isa<FPMathOperator>(getUnderlyingInstr())) 23464eb30cfbSMel Chen O << getUnderlyingInstr()->getFastMathFlags(); 23474eb30cfbSMel Chen O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " ("; 23484eb30cfbSMel Chen getVecOp()->printAsOperand(O, SlotTracker); 23494eb30cfbSMel Chen O << ", "; 23504eb30cfbSMel Chen getEVL()->printAsOperand(O, SlotTracker); 23514eb30cfbSMel Chen if (isConditional()) { 235203975b7fSFlorian Hahn O << ", "; 235303975b7fSFlorian Hahn getCondOp()->printAsOperand(O, SlotTracker); 235403975b7fSFlorian Hahn } 235503975b7fSFlorian Hahn O << ")"; 2356463e7cb8SMel Chen if (RdxDesc.IntermediateStore) 235703975b7fSFlorian Hahn O << " (with final reduction value stored in invariant address sank " 235803975b7fSFlorian Hahn "outside of loop)"; 235903975b7fSFlorian Hahn } 2360df016a95SFlorian Hahn #endif 236103975b7fSFlorian Hahn 23629333b977SFlorian Hahn bool VPReplicateRecipe::shouldPack() const { 23639333b977SFlorian Hahn // Find if the recipe is used by a widened recipe via an intervening 23649333b977SFlorian Hahn // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector. 23659333b977SFlorian Hahn return any_of(users(), [](const VPUser *U) { 23669333b977SFlorian Hahn if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U)) 2367c21ccebeSFlorian Hahn return any_of(PredR->users(), [PredR](const VPUser *U) { 2368c21ccebeSFlorian Hahn return !U->usesScalars(PredR); 2369c21ccebeSFlorian Hahn }); 23709333b977SFlorian Hahn return false; 23719333b977SFlorian Hahn }); 23729333b977SFlorian Hahn } 2373df016a95SFlorian Hahn 2374af6ebb70SFlorian Hahn InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, 2375af6ebb70SFlorian Hahn VPCostContext &Ctx) const { 2376af6ebb70SFlorian Hahn Instruction *UI = cast<Instruction>(getUnderlyingValue()); 2377af6ebb70SFlorian Hahn // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan 2378af6ebb70SFlorian Hahn // transform, avoid computing their cost multiple times for now. 2379af6ebb70SFlorian Hahn Ctx.SkipCostComputation.insert(UI); 2380af6ebb70SFlorian Hahn return Ctx.getLegacyCost(UI, VF); 2381af6ebb70SFlorian Hahn } 2382af6ebb70SFlorian Hahn 2383df016a95SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 238403975b7fSFlorian Hahn void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, 238503975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const { 238603975b7fSFlorian Hahn O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); 238703975b7fSFlorian Hahn 238803975b7fSFlorian Hahn if (!getUnderlyingInstr()->getType()->isVoidTy()) { 238903975b7fSFlorian Hahn printAsOperand(O, SlotTracker); 239003975b7fSFlorian Hahn O << " = "; 239103975b7fSFlorian Hahn } 239203975b7fSFlorian Hahn if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) { 2393299f0ff6SFlorian Hahn O << "call"; 2394299f0ff6SFlorian Hahn printFlags(O); 2395299f0ff6SFlorian Hahn O << "@" << CB->getCalledFunction()->getName() << "("; 239603975b7fSFlorian Hahn interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)), 239703975b7fSFlorian Hahn O, [&O, &SlotTracker](VPValue *Op) { 239803975b7fSFlorian Hahn Op->printAsOperand(O, SlotTracker); 239903975b7fSFlorian Hahn }); 240003975b7fSFlorian Hahn O << ")"; 240103975b7fSFlorian Hahn } else { 2402299f0ff6SFlorian Hahn O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()); 2403299f0ff6SFlorian Hahn printFlags(O); 240403975b7fSFlorian Hahn printOperands(O, SlotTracker); 240503975b7fSFlorian Hahn } 240603975b7fSFlorian Hahn 24079333b977SFlorian Hahn if (shouldPack()) 240803975b7fSFlorian Hahn O << " (S->V)"; 240903975b7fSFlorian Hahn } 2410225e3ec6SFlorian Hahn #endif 241103975b7fSFlorian Hahn 241206c3a7d2SFlorian Hahn Value *VPScalarCastRecipe ::generate(VPTransformState &State) { 2413f4230b43SFlorian Hahn State.setDebugLocFrom(getDebugLoc()); 24140ab539fdSFlorian Hahn assert(vputils::onlyFirstLaneUsed(this) && 24150ab539fdSFlorian Hahn "Codegen only implemented for first lane."); 24160ab539fdSFlorian Hahn switch (Opcode) { 24170ab539fdSFlorian Hahn case Instruction::SExt: 24180ab539fdSFlorian Hahn case Instruction::ZExt: 24190ab539fdSFlorian Hahn case Instruction::Trunc: { 24200ab539fdSFlorian Hahn // Note: SExt/ZExt not used yet. 2421aae7ac66SFlorian Hahn Value *Op = State.get(getOperand(0), VPLane(0)); 24220ab539fdSFlorian Hahn return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy); 24230ab539fdSFlorian Hahn } 24240ab539fdSFlorian Hahn default: 24250ab539fdSFlorian Hahn llvm_unreachable("opcode not implemented yet"); 24260ab539fdSFlorian Hahn } 24270ab539fdSFlorian Hahn } 24280ab539fdSFlorian Hahn 24290ab539fdSFlorian Hahn void VPScalarCastRecipe ::execute(VPTransformState &State) { 2430aae7ac66SFlorian Hahn State.set(this, generate(State), VPLane(0)); 24310ab539fdSFlorian Hahn } 24320ab539fdSFlorian Hahn 24330ab539fdSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 24340ab539fdSFlorian Hahn void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent, 24350ab539fdSFlorian Hahn VPSlotTracker &SlotTracker) const { 24360ab539fdSFlorian Hahn O << Indent << "SCALAR-CAST "; 24370ab539fdSFlorian Hahn printAsOperand(O, SlotTracker); 24380ab539fdSFlorian Hahn O << " = " << Instruction::getOpcodeName(Opcode) << " "; 24390ab539fdSFlorian Hahn printOperands(O, SlotTracker); 24400ab539fdSFlorian Hahn O << " to " << *ResultTy; 24410ab539fdSFlorian Hahn } 24420ab539fdSFlorian Hahn #endif 24430ab539fdSFlorian Hahn 2444225e3ec6SFlorian Hahn void VPBranchOnMaskRecipe::execute(VPTransformState &State) { 2445aae7ac66SFlorian Hahn assert(State.Lane && "Branch on Mask works only on single instance."); 2446225e3ec6SFlorian Hahn 2447225e3ec6SFlorian Hahn 2448225e3ec6SFlorian Hahn Value *ConditionBit = nullptr; 2449225e3ec6SFlorian Hahn VPValue *BlockInMask = getMask(); 2450*713482fcSFlorian Hahn if (BlockInMask) 2451*713482fcSFlorian Hahn ConditionBit = State.get(BlockInMask, *State.Lane); 2452*713482fcSFlorian Hahn else // Block in mask is all-one. 2453225e3ec6SFlorian Hahn ConditionBit = State.Builder.getTrue(); 2454225e3ec6SFlorian Hahn 2455225e3ec6SFlorian Hahn // Replace the temporary unreachable terminator with a new conditional branch, 2456225e3ec6SFlorian Hahn // whose two destinations will be set later when they are created. 2457225e3ec6SFlorian Hahn auto *CurrentTerminator = State.CFG.PrevBB->getTerminator(); 2458225e3ec6SFlorian Hahn assert(isa<UnreachableInst>(CurrentTerminator) && 2459225e3ec6SFlorian Hahn "Expected to replace unreachable terminator with conditional branch."); 2460225e3ec6SFlorian Hahn auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit); 2461225e3ec6SFlorian Hahn CondBr->setSuccessor(0, nullptr); 2462225e3ec6SFlorian Hahn ReplaceInstWithInst(CurrentTerminator, CondBr); 2463225e3ec6SFlorian Hahn } 2464225e3ec6SFlorian Hahn 2465fa3258ecSFlorian Hahn InstructionCost VPBranchOnMaskRecipe::computeCost(ElementCount VF, 2466fa3258ecSFlorian Hahn VPCostContext &Ctx) const { 2467fa3258ecSFlorian Hahn // The legacy cost model doesn't assign costs to branches for individual 2468fa3258ecSFlorian Hahn // replicate regions. Match the current behavior in the VPlan cost model for 2469fa3258ecSFlorian Hahn // now. 2470fa3258ecSFlorian Hahn return 0; 2471fa3258ecSFlorian Hahn } 2472fa3258ecSFlorian Hahn 2473cc0ee179SFlorian Hahn void VPPredInstPHIRecipe::execute(VPTransformState &State) { 24749b496debSFlorian Hahn State.setDebugLocFrom(getDebugLoc()); 2475aae7ac66SFlorian Hahn assert(State.Lane && "Predicated instruction PHI works per instance."); 2476cc0ee179SFlorian Hahn Instruction *ScalarPredInst = 2477aae7ac66SFlorian Hahn cast<Instruction>(State.get(getOperand(0), *State.Lane)); 2478cc0ee179SFlorian Hahn BasicBlock *PredicatedBB = ScalarPredInst->getParent(); 2479cc0ee179SFlorian Hahn BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); 2480cc0ee179SFlorian Hahn assert(PredicatingBB && "Predicated block has no single predecessor."); 2481cc0ee179SFlorian Hahn assert(isa<VPReplicateRecipe>(getOperand(0)) && 2482cc0ee179SFlorian Hahn "operand must be VPReplicateRecipe"); 2483cc0ee179SFlorian Hahn 2484cc0ee179SFlorian Hahn // By current pack/unpack logic we need to generate only a single phi node: if 2485cc0ee179SFlorian Hahn // a vector value for the predicated instruction exists at this point it means 2486cc0ee179SFlorian Hahn // the instruction has vector users only, and a phi for the vector value is 2487cc0ee179SFlorian Hahn // needed. In this case the recipe of the predicated instruction is marked to 2488cc0ee179SFlorian Hahn // also do that packing, thereby "hoisting" the insert-element sequence. 2489cc0ee179SFlorian Hahn // Otherwise, a phi node for the scalar value is needed. 249057f5d8f2SFlorian Hahn if (State.hasVectorValue(getOperand(0))) { 249157f5d8f2SFlorian Hahn Value *VectorValue = State.get(getOperand(0)); 2492cc0ee179SFlorian Hahn InsertElementInst *IEI = cast<InsertElementInst>(VectorValue); 2493cc0ee179SFlorian Hahn PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); 2494cc0ee179SFlorian Hahn VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. 2495cc0ee179SFlorian Hahn VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. 249657f5d8f2SFlorian Hahn if (State.hasVectorValue(this)) 249757f5d8f2SFlorian Hahn State.reset(this, VPhi); 2498cc0ee179SFlorian Hahn else 249957f5d8f2SFlorian Hahn State.set(this, VPhi); 2500cc0ee179SFlorian Hahn // NOTE: Currently we need to update the value of the operand, so the next 2501cc0ee179SFlorian Hahn // predicated iteration inserts its generated value in the correct vector. 250257f5d8f2SFlorian Hahn State.reset(getOperand(0), VPhi); 2503cc0ee179SFlorian Hahn } else { 250468210c7cSFlorian Hahn if (vputils::onlyFirstLaneUsed(this) && !State.Lane->isFirstLane()) 250568210c7cSFlorian Hahn return; 250668210c7cSFlorian Hahn 2507cc0ee179SFlorian Hahn Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType(); 2508cc0ee179SFlorian Hahn PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); 2509cc0ee179SFlorian Hahn Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), 2510cc0ee179SFlorian Hahn PredicatingBB); 2511cc0ee179SFlorian Hahn Phi->addIncoming(ScalarPredInst, PredicatedBB); 2512aae7ac66SFlorian Hahn if (State.hasScalarValue(this, *State.Lane)) 2513aae7ac66SFlorian Hahn State.reset(this, Phi, *State.Lane); 2514cc0ee179SFlorian Hahn else 2515aae7ac66SFlorian Hahn State.set(this, Phi, *State.Lane); 2516cc0ee179SFlorian Hahn // NOTE: Currently we need to update the value of the operand, so the next 2517cc0ee179SFlorian Hahn // predicated iteration inserts its generated value in the correct vector. 2518aae7ac66SFlorian Hahn State.reset(getOperand(0), Phi, *State.Lane); 2519cc0ee179SFlorian Hahn } 2520cc0ee179SFlorian Hahn } 2521cc0ee179SFlorian Hahn 2522225e3ec6SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 252303975b7fSFlorian Hahn void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, 252403975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const { 252503975b7fSFlorian Hahn O << Indent << "PHI-PREDICATED-INSTRUCTION "; 252603975b7fSFlorian Hahn printAsOperand(O, SlotTracker); 252703975b7fSFlorian Hahn O << " = "; 252803975b7fSFlorian Hahn printOperands(O, SlotTracker); 252903975b7fSFlorian Hahn } 253035d3625aSFlorian Hahn #endif 253103975b7fSFlorian Hahn 2532ed220e15SElvis Wang InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF, 2533ed220e15SElvis Wang VPCostContext &Ctx) const { 25349ab5474eSBenjamin Maxwell Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF); 2535ed220e15SElvis Wang const Align Alignment = 2536ed220e15SElvis Wang getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient)); 2537ed220e15SElvis Wang unsigned AS = 2538ed220e15SElvis Wang getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient)); 2539ed220e15SElvis Wang 2540ed220e15SElvis Wang if (!Consecutive) { 2541ed220e15SElvis Wang // TODO: Using the original IR may not be accurate. 2542ed220e15SElvis Wang // Currently, ARM will use the underlying IR to calculate gather/scatter 2543ed220e15SElvis Wang // instruction cost. 2544ed220e15SElvis Wang const Value *Ptr = getLoadStorePointerOperand(&Ingredient); 2545ed220e15SElvis Wang assert(!Reverse && 2546ed220e15SElvis Wang "Inconsecutive memory access should not have the order."); 2547ed220e15SElvis Wang return Ctx.TTI.getAddressComputationCost(Ty) + 2548ed220e15SElvis Wang Ctx.TTI.getGatherScatterOpCost(Ingredient.getOpcode(), Ty, Ptr, 2549edf3a55bSJohn Brawn IsMasked, Alignment, Ctx.CostKind, 2550ed220e15SElvis Wang &Ingredient); 2551ed220e15SElvis Wang } 2552ed220e15SElvis Wang 2553ed220e15SElvis Wang InstructionCost Cost = 0; 2554ed220e15SElvis Wang if (IsMasked) { 2555ed220e15SElvis Wang Cost += Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, 2556edf3a55bSJohn Brawn AS, Ctx.CostKind); 2557ed220e15SElvis Wang } else { 2558ed220e15SElvis Wang TTI::OperandValueInfo OpInfo = 2559ed220e15SElvis Wang Ctx.TTI.getOperandInfo(Ingredient.getOperand(0)); 2560ed220e15SElvis Wang Cost += Ctx.TTI.getMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, AS, 2561edf3a55bSJohn Brawn Ctx.CostKind, OpInfo, &Ingredient); 2562ed220e15SElvis Wang } 2563ed220e15SElvis Wang if (!Reverse) 2564ed220e15SElvis Wang return Cost; 2565ed220e15SElvis Wang 2566edf3a55bSJohn Brawn return Cost += 2567edf3a55bSJohn Brawn Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, 2568edf3a55bSJohn Brawn cast<VectorType>(Ty), {}, Ctx.CostKind, 0); 2569ed220e15SElvis Wang } 2570ed220e15SElvis Wang 257135d3625aSFlorian Hahn void VPWidenLoadRecipe::execute(VPTransformState &State) { 257235d3625aSFlorian Hahn auto *LI = cast<LoadInst>(&Ingredient); 257335d3625aSFlorian Hahn 257435d3625aSFlorian Hahn Type *ScalarDataTy = getLoadStoreType(&Ingredient); 257535d3625aSFlorian Hahn auto *DataTy = VectorType::get(ScalarDataTy, State.VF); 257635d3625aSFlorian Hahn const Align Alignment = getLoadStoreAlignment(&Ingredient); 257735d3625aSFlorian Hahn bool CreateGather = !isConsecutive(); 257835d3625aSFlorian Hahn 257935d3625aSFlorian Hahn auto &Builder = State.Builder; 258035d3625aSFlorian Hahn State.setDebugLocFrom(getDebugLoc()); 258135d3625aSFlorian Hahn Value *Mask = nullptr; 258235d3625aSFlorian Hahn if (auto *VPMask = getMask()) { 258335d3625aSFlorian Hahn // Mask reversal is only needed for non-all-one (null) masks, as reverse 258435d3625aSFlorian Hahn // of a null all-one mask is a null mask. 258557f5d8f2SFlorian Hahn Mask = State.get(VPMask); 258635d3625aSFlorian Hahn if (isReverse()) 258735d3625aSFlorian Hahn Mask = Builder.CreateVectorReverse(Mask, "reverse"); 258835d3625aSFlorian Hahn } 258935d3625aSFlorian Hahn 259057f5d8f2SFlorian Hahn Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateGather); 259106c3a7d2SFlorian Hahn Value *NewLI; 259235d3625aSFlorian Hahn if (CreateGather) { 259335d3625aSFlorian Hahn NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr, 259435d3625aSFlorian Hahn "wide.masked.gather"); 259535d3625aSFlorian Hahn } else if (Mask) { 259606c3a7d2SFlorian Hahn NewLI = 259706c3a7d2SFlorian Hahn Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask, 259806c3a7d2SFlorian Hahn PoisonValue::get(DataTy), "wide.masked.load"); 259935d3625aSFlorian Hahn } else { 260035d3625aSFlorian Hahn NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load"); 260135d3625aSFlorian Hahn } 260235d3625aSFlorian Hahn // Add metadata to the load, but setVectorValue to the reverse shuffle. 260335d3625aSFlorian Hahn State.addMetadata(NewLI, LI); 260435d3625aSFlorian Hahn if (Reverse) 260535d3625aSFlorian Hahn NewLI = Builder.CreateVectorReverse(NewLI, "reverse"); 260657f5d8f2SFlorian Hahn State.set(this, NewLI); 260735d3625aSFlorian Hahn } 260835d3625aSFlorian Hahn 260935d3625aSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2610a9bafe91SFlorian Hahn void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent, 261103975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const { 261203975b7fSFlorian Hahn O << Indent << "WIDEN "; 2613a9bafe91SFlorian Hahn printAsOperand(O, SlotTracker); 2614a9bafe91SFlorian Hahn O << " = load "; 2615a9bafe91SFlorian Hahn printOperands(O, SlotTracker); 261603975b7fSFlorian Hahn } 26171fa6c99aSFlorian Hahn #endif 261803975b7fSFlorian Hahn 26191fa6c99aSFlorian Hahn /// Use all-true mask for reverse rather than actual mask, as it avoids a 26201fa6c99aSFlorian Hahn /// dependence w/o affecting the result. 26211fa6c99aSFlorian Hahn static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand, 26221fa6c99aSFlorian Hahn Value *EVL, const Twine &Name) { 26231fa6c99aSFlorian Hahn VectorType *ValTy = cast<VectorType>(Operand->getType()); 26241fa6c99aSFlorian Hahn Value *AllTrueMask = 26251fa6c99aSFlorian Hahn Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue()); 26261fa6c99aSFlorian Hahn return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse, 26271fa6c99aSFlorian Hahn {Operand, AllTrueMask, EVL}, nullptr, Name); 26281fa6c99aSFlorian Hahn } 26291fa6c99aSFlorian Hahn 26301fa6c99aSFlorian Hahn void VPWidenLoadEVLRecipe::execute(VPTransformState &State) { 26311fa6c99aSFlorian Hahn auto *LI = cast<LoadInst>(&Ingredient); 26321fa6c99aSFlorian Hahn 26331fa6c99aSFlorian Hahn Type *ScalarDataTy = getLoadStoreType(&Ingredient); 26341fa6c99aSFlorian Hahn auto *DataTy = VectorType::get(ScalarDataTy, State.VF); 26351fa6c99aSFlorian Hahn const Align Alignment = getLoadStoreAlignment(&Ingredient); 26361fa6c99aSFlorian Hahn bool CreateGather = !isConsecutive(); 26371fa6c99aSFlorian Hahn 26381fa6c99aSFlorian Hahn auto &Builder = State.Builder; 26391fa6c99aSFlorian Hahn State.setDebugLocFrom(getDebugLoc()); 26401fa6c99aSFlorian Hahn CallInst *NewLI; 2641aae7ac66SFlorian Hahn Value *EVL = State.get(getEVL(), VPLane(0)); 264257f5d8f2SFlorian Hahn Value *Addr = State.get(getAddr(), !CreateGather); 26431fa6c99aSFlorian Hahn Value *Mask = nullptr; 26441fa6c99aSFlorian Hahn if (VPValue *VPMask = getMask()) { 264557f5d8f2SFlorian Hahn Mask = State.get(VPMask); 26461fa6c99aSFlorian Hahn if (isReverse()) 26471fa6c99aSFlorian Hahn Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask"); 26481fa6c99aSFlorian Hahn } else { 26491fa6c99aSFlorian Hahn Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue()); 26501fa6c99aSFlorian Hahn } 26511fa6c99aSFlorian Hahn 26521fa6c99aSFlorian Hahn if (CreateGather) { 26531fa6c99aSFlorian Hahn NewLI = 26541fa6c99aSFlorian Hahn Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL}, 26551fa6c99aSFlorian Hahn nullptr, "wide.masked.gather"); 26561fa6c99aSFlorian Hahn } else { 26571fa6c99aSFlorian Hahn VectorBuilder VBuilder(Builder); 26581fa6c99aSFlorian Hahn VBuilder.setEVL(EVL).setMask(Mask); 26591fa6c99aSFlorian Hahn NewLI = cast<CallInst>(VBuilder.createVectorInstruction( 26601fa6c99aSFlorian Hahn Instruction::Load, DataTy, Addr, "vp.op.load")); 26611fa6c99aSFlorian Hahn } 26621fa6c99aSFlorian Hahn NewLI->addParamAttr( 26631fa6c99aSFlorian Hahn 0, Attribute::getWithAlignment(NewLI->getContext(), Alignment)); 26641fa6c99aSFlorian Hahn State.addMetadata(NewLI, LI); 26651fa6c99aSFlorian Hahn Instruction *Res = NewLI; 26661fa6c99aSFlorian Hahn if (isReverse()) 26671fa6c99aSFlorian Hahn Res = createReverseEVL(Builder, Res, EVL, "vp.reverse"); 266857f5d8f2SFlorian Hahn State.set(this, Res); 26691fa6c99aSFlorian Hahn } 26701fa6c99aSFlorian Hahn 2671a068b974SElvis Wang InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF, 2672a068b974SElvis Wang VPCostContext &Ctx) const { 2673a068b974SElvis Wang if (!Consecutive || IsMasked) 2674a068b974SElvis Wang return VPWidenMemoryRecipe::computeCost(VF, Ctx); 2675a068b974SElvis Wang 2676a068b974SElvis Wang // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost() 2677a068b974SElvis Wang // here because the EVL recipes using EVL to replace the tail mask. But in the 2678a068b974SElvis Wang // legacy model, it will always calculate the cost of mask. 2679a068b974SElvis Wang // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we 2680a068b974SElvis Wang // don't need to compare to the legacy cost model. 26819ab5474eSBenjamin Maxwell Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF); 2682a068b974SElvis Wang const Align Alignment = 2683a068b974SElvis Wang getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient)); 2684a068b974SElvis Wang unsigned AS = 2685a068b974SElvis Wang getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient)); 2686a068b974SElvis Wang InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost( 2687edf3a55bSJohn Brawn Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind); 2688a068b974SElvis Wang if (!Reverse) 2689a068b974SElvis Wang return Cost; 2690a068b974SElvis Wang 2691a068b974SElvis Wang return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, 2692edf3a55bSJohn Brawn cast<VectorType>(Ty), {}, Ctx.CostKind, 2693edf3a55bSJohn Brawn 0); 2694a068b974SElvis Wang } 2695a068b974SElvis Wang 26961fa6c99aSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2697e2a72fa5SFlorian Hahn void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent, 2698e2a72fa5SFlorian Hahn VPSlotTracker &SlotTracker) const { 2699e2a72fa5SFlorian Hahn O << Indent << "WIDEN "; 2700e2a72fa5SFlorian Hahn printAsOperand(O, SlotTracker); 2701e2a72fa5SFlorian Hahn O << " = vp.load "; 2702e2a72fa5SFlorian Hahn printOperands(O, SlotTracker); 2703e2a72fa5SFlorian Hahn } 270412763a06SFlorian Hahn #endif 2705e2a72fa5SFlorian Hahn 270612763a06SFlorian Hahn void VPWidenStoreRecipe::execute(VPTransformState &State) { 270712763a06SFlorian Hahn auto *SI = cast<StoreInst>(&Ingredient); 270812763a06SFlorian Hahn 270912763a06SFlorian Hahn VPValue *StoredVPValue = getStoredValue(); 271012763a06SFlorian Hahn bool CreateScatter = !isConsecutive(); 271112763a06SFlorian Hahn const Align Alignment = getLoadStoreAlignment(&Ingredient); 271212763a06SFlorian Hahn 271312763a06SFlorian Hahn auto &Builder = State.Builder; 271412763a06SFlorian Hahn State.setDebugLocFrom(getDebugLoc()); 271512763a06SFlorian Hahn 271612763a06SFlorian Hahn Value *Mask = nullptr; 271712763a06SFlorian Hahn if (auto *VPMask = getMask()) { 271812763a06SFlorian Hahn // Mask reversal is only needed for non-all-one (null) masks, as reverse 271912763a06SFlorian Hahn // of a null all-one mask is a null mask. 272057f5d8f2SFlorian Hahn Mask = State.get(VPMask); 272112763a06SFlorian Hahn if (isReverse()) 272212763a06SFlorian Hahn Mask = Builder.CreateVectorReverse(Mask, "reverse"); 272312763a06SFlorian Hahn } 272412763a06SFlorian Hahn 272557f5d8f2SFlorian Hahn Value *StoredVal = State.get(StoredVPValue); 272612763a06SFlorian Hahn if (isReverse()) { 272712763a06SFlorian Hahn // If we store to reverse consecutive memory locations, then we need 272812763a06SFlorian Hahn // to reverse the order of elements in the stored value. 272912763a06SFlorian Hahn StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse"); 273012763a06SFlorian Hahn // We don't want to update the value in the map as it might be used in 273112763a06SFlorian Hahn // another expression. So don't call resetVectorValue(StoredVal). 273212763a06SFlorian Hahn } 273357f5d8f2SFlorian Hahn Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter); 273406c3a7d2SFlorian Hahn Instruction *NewSI = nullptr; 273512763a06SFlorian Hahn if (CreateScatter) 273612763a06SFlorian Hahn NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask); 273712763a06SFlorian Hahn else if (Mask) 273812763a06SFlorian Hahn NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask); 273912763a06SFlorian Hahn else 274012763a06SFlorian Hahn NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment); 274112763a06SFlorian Hahn State.addMetadata(NewSI, SI); 274212763a06SFlorian Hahn } 274312763a06SFlorian Hahn 274412763a06SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2745a9bafe91SFlorian Hahn void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent, 2746a9bafe91SFlorian Hahn VPSlotTracker &SlotTracker) const { 2747a9bafe91SFlorian Hahn O << Indent << "WIDEN store "; 274803975b7fSFlorian Hahn printOperands(O, SlotTracker); 274903975b7fSFlorian Hahn } 27501fa6c99aSFlorian Hahn #endif 2751e2a72fa5SFlorian Hahn 27521fa6c99aSFlorian Hahn void VPWidenStoreEVLRecipe::execute(VPTransformState &State) { 27531fa6c99aSFlorian Hahn auto *SI = cast<StoreInst>(&Ingredient); 27541fa6c99aSFlorian Hahn 27551fa6c99aSFlorian Hahn VPValue *StoredValue = getStoredValue(); 27561fa6c99aSFlorian Hahn bool CreateScatter = !isConsecutive(); 27571fa6c99aSFlorian Hahn const Align Alignment = getLoadStoreAlignment(&Ingredient); 27581fa6c99aSFlorian Hahn 27591fa6c99aSFlorian Hahn auto &Builder = State.Builder; 27601fa6c99aSFlorian Hahn State.setDebugLocFrom(getDebugLoc()); 27611fa6c99aSFlorian Hahn 27621fa6c99aSFlorian Hahn CallInst *NewSI = nullptr; 276357f5d8f2SFlorian Hahn Value *StoredVal = State.get(StoredValue); 2764aae7ac66SFlorian Hahn Value *EVL = State.get(getEVL(), VPLane(0)); 27651fa6c99aSFlorian Hahn if (isReverse()) 27661fa6c99aSFlorian Hahn StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse"); 27671fa6c99aSFlorian Hahn Value *Mask = nullptr; 27681fa6c99aSFlorian Hahn if (VPValue *VPMask = getMask()) { 276957f5d8f2SFlorian Hahn Mask = State.get(VPMask); 27701fa6c99aSFlorian Hahn if (isReverse()) 27711fa6c99aSFlorian Hahn Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask"); 27721fa6c99aSFlorian Hahn } else { 27731fa6c99aSFlorian Hahn Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue()); 27741fa6c99aSFlorian Hahn } 277557f5d8f2SFlorian Hahn Value *Addr = State.get(getAddr(), !CreateScatter); 27761fa6c99aSFlorian Hahn if (CreateScatter) { 27771fa6c99aSFlorian Hahn NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()), 27781fa6c99aSFlorian Hahn Intrinsic::vp_scatter, 27791fa6c99aSFlorian Hahn {StoredVal, Addr, Mask, EVL}); 27801fa6c99aSFlorian Hahn } else { 27811fa6c99aSFlorian Hahn VectorBuilder VBuilder(Builder); 27821fa6c99aSFlorian Hahn VBuilder.setEVL(EVL).setMask(Mask); 27831fa6c99aSFlorian Hahn NewSI = cast<CallInst>(VBuilder.createVectorInstruction( 27841fa6c99aSFlorian Hahn Instruction::Store, Type::getVoidTy(EVL->getContext()), 27851fa6c99aSFlorian Hahn {StoredVal, Addr})); 27861fa6c99aSFlorian Hahn } 27871fa6c99aSFlorian Hahn NewSI->addParamAttr( 27881fa6c99aSFlorian Hahn 1, Attribute::getWithAlignment(NewSI->getContext(), Alignment)); 27891fa6c99aSFlorian Hahn State.addMetadata(NewSI, SI); 27901fa6c99aSFlorian Hahn } 27911fa6c99aSFlorian Hahn 2792a068b974SElvis Wang InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF, 2793a068b974SElvis Wang VPCostContext &Ctx) const { 2794a068b974SElvis Wang if (!Consecutive || IsMasked) 2795a068b974SElvis Wang return VPWidenMemoryRecipe::computeCost(VF, Ctx); 2796a068b974SElvis Wang 2797a068b974SElvis Wang // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost() 2798a068b974SElvis Wang // here because the EVL recipes using EVL to replace the tail mask. But in the 2799a068b974SElvis Wang // legacy model, it will always calculate the cost of mask. 2800a068b974SElvis Wang // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we 2801a068b974SElvis Wang // don't need to compare to the legacy cost model. 28029ab5474eSBenjamin Maxwell Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF); 2803a068b974SElvis Wang const Align Alignment = 2804a068b974SElvis Wang getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient)); 2805a068b974SElvis Wang unsigned AS = 2806a068b974SElvis Wang getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient)); 2807a068b974SElvis Wang InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost( 2808edf3a55bSJohn Brawn Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind); 2809a068b974SElvis Wang if (!Reverse) 2810a068b974SElvis Wang return Cost; 2811a068b974SElvis Wang 2812a068b974SElvis Wang return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, 2813edf3a55bSJohn Brawn cast<VectorType>(Ty), {}, Ctx.CostKind, 2814edf3a55bSJohn Brawn 0); 2815a068b974SElvis Wang } 2816a068b974SElvis Wang 28171fa6c99aSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2818e2a72fa5SFlorian Hahn void VPWidenStoreEVLRecipe::print(raw_ostream &O, const Twine &Indent, 2819e2a72fa5SFlorian Hahn VPSlotTracker &SlotTracker) const { 2820e2a72fa5SFlorian Hahn O << Indent << "WIDEN vp.store "; 2821e2a72fa5SFlorian Hahn printOperands(O, SlotTracker); 2822e2a72fa5SFlorian Hahn } 282303975b7fSFlorian Hahn #endif 282403975b7fSFlorian Hahn 2825a23efcc7SFlorian Hahn static Value *createBitOrPointerCast(IRBuilderBase &Builder, Value *V, 2826a23efcc7SFlorian Hahn VectorType *DstVTy, const DataLayout &DL) { 2827a23efcc7SFlorian Hahn // Verify that V is a vector type with same number of elements as DstVTy. 2828a23efcc7SFlorian Hahn auto VF = DstVTy->getElementCount(); 2829a23efcc7SFlorian Hahn auto *SrcVecTy = cast<VectorType>(V->getType()); 2830a23efcc7SFlorian Hahn assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match"); 2831a23efcc7SFlorian Hahn Type *SrcElemTy = SrcVecTy->getElementType(); 2832a23efcc7SFlorian Hahn Type *DstElemTy = DstVTy->getElementType(); 2833a23efcc7SFlorian Hahn assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) && 2834a23efcc7SFlorian Hahn "Vector elements must have same size"); 2835a23efcc7SFlorian Hahn 2836a23efcc7SFlorian Hahn // Do a direct cast if element types are castable. 2837a23efcc7SFlorian Hahn if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) { 2838a23efcc7SFlorian Hahn return Builder.CreateBitOrPointerCast(V, DstVTy); 2839a23efcc7SFlorian Hahn } 2840a23efcc7SFlorian Hahn // V cannot be directly casted to desired vector type. 2841a23efcc7SFlorian Hahn // May happen when V is a floating point vector but DstVTy is a vector of 2842a23efcc7SFlorian Hahn // pointers or vice-versa. Handle this using a two-step bitcast using an 2843a23efcc7SFlorian Hahn // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float. 2844a23efcc7SFlorian Hahn assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) && 2845a23efcc7SFlorian Hahn "Only one type should be a pointer type"); 2846a23efcc7SFlorian Hahn assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) && 2847a23efcc7SFlorian Hahn "Only one type should be a floating point type"); 2848a23efcc7SFlorian Hahn Type *IntTy = 2849a23efcc7SFlorian Hahn IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy)); 2850a23efcc7SFlorian Hahn auto *VecIntTy = VectorType::get(IntTy, VF); 2851a23efcc7SFlorian Hahn Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy); 2852a23efcc7SFlorian Hahn return Builder.CreateBitOrPointerCast(CastVal, DstVTy); 2853a23efcc7SFlorian Hahn } 2854a23efcc7SFlorian Hahn 2855a23efcc7SFlorian Hahn /// Return a vector containing interleaved elements from multiple 2856a23efcc7SFlorian Hahn /// smaller input vectors. 2857a23efcc7SFlorian Hahn static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals, 2858a23efcc7SFlorian Hahn const Twine &Name) { 2859a23efcc7SFlorian Hahn unsigned Factor = Vals.size(); 2860a23efcc7SFlorian Hahn assert(Factor > 1 && "Tried to interleave invalid number of vectors"); 2861a23efcc7SFlorian Hahn 2862a23efcc7SFlorian Hahn VectorType *VecTy = cast<VectorType>(Vals[0]->getType()); 2863a23efcc7SFlorian Hahn #ifndef NDEBUG 2864a23efcc7SFlorian Hahn for (Value *Val : Vals) 2865a23efcc7SFlorian Hahn assert(Val->getType() == VecTy && "Tried to interleave mismatched types"); 2866a23efcc7SFlorian Hahn #endif 2867a23efcc7SFlorian Hahn 2868a23efcc7SFlorian Hahn // Scalable vectors cannot use arbitrary shufflevectors (only splats), so 2869a23efcc7SFlorian Hahn // must use intrinsics to interleave. 2870a23efcc7SFlorian Hahn if (VecTy->isScalableTy()) { 28716c787ff6SFlorian Hahn VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy); 28726c787ff6SFlorian Hahn return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2, 28736c787ff6SFlorian Hahn Vals, 2874a23efcc7SFlorian Hahn /*FMFSource=*/nullptr, Name); 2875a23efcc7SFlorian Hahn } 2876a23efcc7SFlorian Hahn 2877a23efcc7SFlorian Hahn // Fixed length. Start by concatenating all vectors into a wide vector. 2878a23efcc7SFlorian Hahn Value *WideVec = concatenateVectors(Builder, Vals); 2879a23efcc7SFlorian Hahn 2880a23efcc7SFlorian Hahn // Interleave the elements into the wide vector. 2881a23efcc7SFlorian Hahn const unsigned NumElts = VecTy->getElementCount().getFixedValue(); 2882a23efcc7SFlorian Hahn return Builder.CreateShuffleVector( 2883a23efcc7SFlorian Hahn WideVec, createInterleaveMask(NumElts, Factor), Name); 2884a23efcc7SFlorian Hahn } 2885a23efcc7SFlorian Hahn 2886a23efcc7SFlorian Hahn // Try to vectorize the interleave group that \p Instr belongs to. 2887a23efcc7SFlorian Hahn // 2888a23efcc7SFlorian Hahn // E.g. Translate following interleaved load group (factor = 3): 2889a23efcc7SFlorian Hahn // for (i = 0; i < N; i+=3) { 2890a23efcc7SFlorian Hahn // R = Pic[i]; // Member of index 0 2891a23efcc7SFlorian Hahn // G = Pic[i+1]; // Member of index 1 2892a23efcc7SFlorian Hahn // B = Pic[i+2]; // Member of index 2 2893a23efcc7SFlorian Hahn // ... // do something to R, G, B 2894a23efcc7SFlorian Hahn // } 2895a23efcc7SFlorian Hahn // To: 2896a23efcc7SFlorian Hahn // %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B 2897a23efcc7SFlorian Hahn // %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9> ; R elements 2898a23efcc7SFlorian Hahn // %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10> ; G elements 2899a23efcc7SFlorian Hahn // %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11> ; B elements 2900a23efcc7SFlorian Hahn // 2901a23efcc7SFlorian Hahn // Or translate following interleaved store group (factor = 3): 2902a23efcc7SFlorian Hahn // for (i = 0; i < N; i+=3) { 2903a23efcc7SFlorian Hahn // ... do something to R, G, B 2904a23efcc7SFlorian Hahn // Pic[i] = R; // Member of index 0 2905a23efcc7SFlorian Hahn // Pic[i+1] = G; // Member of index 1 2906a23efcc7SFlorian Hahn // Pic[i+2] = B; // Member of index 2 2907a23efcc7SFlorian Hahn // } 2908a23efcc7SFlorian Hahn // To: 2909a23efcc7SFlorian Hahn // %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7> 2910a23efcc7SFlorian Hahn // %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u> 2911a23efcc7SFlorian Hahn // %interleaved.vec = shuffle %R_G.vec, %B_U.vec, 2912a23efcc7SFlorian Hahn // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements 2913a23efcc7SFlorian Hahn // store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B 2914a23efcc7SFlorian Hahn void VPInterleaveRecipe::execute(VPTransformState &State) { 2915aae7ac66SFlorian Hahn assert(!State.Lane && "Interleave group being replicated."); 2916a23efcc7SFlorian Hahn const InterleaveGroup<Instruction> *Group = IG; 2917a23efcc7SFlorian Hahn Instruction *Instr = Group->getInsertPos(); 2918a23efcc7SFlorian Hahn 2919a23efcc7SFlorian Hahn // Prepare for the vector type of the interleaved load/store. 2920a23efcc7SFlorian Hahn Type *ScalarTy = getLoadStoreType(Instr); 2921a23efcc7SFlorian Hahn unsigned InterleaveFactor = Group->getFactor(); 2922a23efcc7SFlorian Hahn auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor); 2923a23efcc7SFlorian Hahn 2924a23efcc7SFlorian Hahn // TODO: extend the masked interleaved-group support to reversed access. 2925a23efcc7SFlorian Hahn VPValue *BlockInMask = getMask(); 2926a23efcc7SFlorian Hahn assert((!BlockInMask || !Group->isReverse()) && 2927a23efcc7SFlorian Hahn "Reversed masked interleave-group not supported."); 2928a23efcc7SFlorian Hahn 29293ec6f805SFlorian Hahn VPValue *Addr = getAddr(); 29303ec6f805SFlorian Hahn Value *ResAddr = State.get(Addr, VPLane(0)); 29313ec6f805SFlorian Hahn if (auto *I = dyn_cast<Instruction>(ResAddr)) 29323ec6f805SFlorian Hahn State.setDebugLocFrom(I->getDebugLoc()); 29333ec6f805SFlorian Hahn 2934a23efcc7SFlorian Hahn // If the group is reverse, adjust the index to refer to the last vector lane 2935a23efcc7SFlorian Hahn // instead of the first. We adjust the index from the first vector lane, 2936a23efcc7SFlorian Hahn // rather than directly getting the pointer for lane VF - 1, because the 29373fbf6f8bSFlorian Hahn // pointer operand of the interleaved access is supposed to be uniform. 2938a23efcc7SFlorian Hahn if (Group->isReverse()) { 2939a23efcc7SFlorian Hahn Value *RuntimeVF = 2940a23efcc7SFlorian Hahn getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF); 29413ec6f805SFlorian Hahn Value *Index = 29423ec6f805SFlorian Hahn State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1)); 29437f746518SFlorian Hahn Index = State.Builder.CreateMul(Index, 2944a23efcc7SFlorian Hahn State.Builder.getInt32(Group->getFactor())); 29457f746518SFlorian Hahn Index = State.Builder.CreateNeg(Index); 2946a23efcc7SFlorian Hahn 2947a23efcc7SFlorian Hahn bool InBounds = false; 29483ec6f805SFlorian Hahn if (auto *Gep = dyn_cast<GetElementPtrInst>(ResAddr->stripPointerCasts())) 29493ec6f805SFlorian Hahn InBounds = Gep->isInBounds(); 29507f746518SFlorian Hahn ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds); 29513ec6f805SFlorian Hahn } 2952a23efcc7SFlorian Hahn 2953a23efcc7SFlorian Hahn State.setDebugLocFrom(Instr->getDebugLoc()); 2954a23efcc7SFlorian Hahn Value *PoisonVec = PoisonValue::get(VecTy); 2955a23efcc7SFlorian Hahn 295606c3a7d2SFlorian Hahn auto CreateGroupMask = [&BlockInMask, &State, 295706c3a7d2SFlorian Hahn &InterleaveFactor](Value *MaskForGaps) -> Value * { 2958a23efcc7SFlorian Hahn if (State.VF.isScalable()) { 2959a23efcc7SFlorian Hahn assert(!MaskForGaps && "Interleaved groups with gaps are not supported."); 29606c787ff6SFlorian Hahn assert(InterleaveFactor == 2 && 2961a23efcc7SFlorian Hahn "Unsupported deinterleave factor for scalable vectors"); 296257f5d8f2SFlorian Hahn auto *ResBlockInMask = State.get(BlockInMask); 29636c787ff6SFlorian Hahn SmallVector<Value *, 2> Ops = {ResBlockInMask, ResBlockInMask}; 29646c787ff6SFlorian Hahn auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(), 29656c787ff6SFlorian Hahn State.VF.getKnownMinValue() * 2, true); 29666c787ff6SFlorian Hahn return State.Builder.CreateIntrinsic( 29676c787ff6SFlorian Hahn MaskTy, Intrinsic::vector_interleave2, Ops, 29686c787ff6SFlorian Hahn /*FMFSource=*/nullptr, "interleaved.mask"); 2969a23efcc7SFlorian Hahn } 2970a23efcc7SFlorian Hahn 2971a23efcc7SFlorian Hahn if (!BlockInMask) 2972a23efcc7SFlorian Hahn return MaskForGaps; 2973a23efcc7SFlorian Hahn 297457f5d8f2SFlorian Hahn Value *ResBlockInMask = State.get(BlockInMask); 2975a23efcc7SFlorian Hahn Value *ShuffledMask = State.Builder.CreateShuffleVector( 297606c3a7d2SFlorian Hahn ResBlockInMask, 2977a23efcc7SFlorian Hahn createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()), 2978a23efcc7SFlorian Hahn "interleaved.mask"); 2979a23efcc7SFlorian Hahn return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And, 2980a23efcc7SFlorian Hahn ShuffledMask, MaskForGaps) 2981a23efcc7SFlorian Hahn : ShuffledMask; 2982a23efcc7SFlorian Hahn }; 2983a23efcc7SFlorian Hahn 2984a23efcc7SFlorian Hahn const DataLayout &DL = Instr->getDataLayout(); 2985a23efcc7SFlorian Hahn // Vectorize the interleaved load group. 2986a23efcc7SFlorian Hahn if (isa<LoadInst>(Instr)) { 2987a23efcc7SFlorian Hahn Value *MaskForGaps = nullptr; 2988a23efcc7SFlorian Hahn if (NeedsMaskForGaps) { 2989a23efcc7SFlorian Hahn MaskForGaps = createBitMaskForGaps(State.Builder, 2990a23efcc7SFlorian Hahn State.VF.getKnownMinValue(), *Group); 2991a23efcc7SFlorian Hahn assert(MaskForGaps && "Mask for Gaps is required but it is null"); 2992a23efcc7SFlorian Hahn } 2993a23efcc7SFlorian Hahn 2994a23efcc7SFlorian Hahn Instruction *NewLoad; 2995a23efcc7SFlorian Hahn if (BlockInMask || MaskForGaps) { 299606c3a7d2SFlorian Hahn Value *GroupMask = CreateGroupMask(MaskForGaps); 299706c3a7d2SFlorian Hahn NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr, 2998a23efcc7SFlorian Hahn Group->getAlign(), GroupMask, 2999a23efcc7SFlorian Hahn PoisonVec, "wide.masked.vec"); 3000a23efcc7SFlorian Hahn } else 300106c3a7d2SFlorian Hahn NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr, 300206c3a7d2SFlorian Hahn Group->getAlign(), "wide.vec"); 3003a23efcc7SFlorian Hahn Group->addMetadata(NewLoad); 3004a23efcc7SFlorian Hahn 3005a23efcc7SFlorian Hahn ArrayRef<VPValue *> VPDefs = definedValues(); 3006a23efcc7SFlorian Hahn const DataLayout &DL = State.CFG.PrevBB->getDataLayout(); 3007a23efcc7SFlorian Hahn if (VecTy->isScalableTy()) { 30086c787ff6SFlorian Hahn assert(InterleaveFactor == 2 && 3009a23efcc7SFlorian Hahn "Unsupported deinterleave factor for scalable vectors"); 3010a23efcc7SFlorian Hahn 3011a23efcc7SFlorian Hahn // Scalable vectors cannot use arbitrary shufflevectors (only splats), 3012a23efcc7SFlorian Hahn // so must use intrinsics to deinterleave. 30136c787ff6SFlorian Hahn Value *DI = State.Builder.CreateIntrinsic( 30146c787ff6SFlorian Hahn Intrinsic::vector_deinterleave2, VecTy, NewLoad, 3015a23efcc7SFlorian Hahn /*FMFSource=*/nullptr, "strided.vec"); 30166c787ff6SFlorian Hahn unsigned J = 0; 30176c787ff6SFlorian Hahn for (unsigned I = 0; I < InterleaveFactor; ++I) { 3018a23efcc7SFlorian Hahn Instruction *Member = Group->getMember(I); 30196c787ff6SFlorian Hahn 30206c787ff6SFlorian Hahn if (!Member) 3021a23efcc7SFlorian Hahn continue; 30226c787ff6SFlorian Hahn 30236c787ff6SFlorian Hahn Value *StridedVec = State.Builder.CreateExtractValue(DI, I); 3024a23efcc7SFlorian Hahn // If this member has different type, cast the result type. 3025a23efcc7SFlorian Hahn if (Member->getType() != ScalarTy) { 3026a23efcc7SFlorian Hahn VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF); 3027a23efcc7SFlorian Hahn StridedVec = 3028a23efcc7SFlorian Hahn createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL); 3029a23efcc7SFlorian Hahn } 3030a23efcc7SFlorian Hahn 3031a23efcc7SFlorian Hahn if (Group->isReverse()) 303206c3a7d2SFlorian Hahn StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse"); 3033a23efcc7SFlorian Hahn 303457f5d8f2SFlorian Hahn State.set(VPDefs[J], StridedVec); 3035a23efcc7SFlorian Hahn ++J; 3036a23efcc7SFlorian Hahn } 3037a23efcc7SFlorian Hahn 3038a23efcc7SFlorian Hahn return; 3039a23efcc7SFlorian Hahn } 3040a23efcc7SFlorian Hahn 3041a23efcc7SFlorian Hahn // For each member in the group, shuffle out the appropriate data from the 3042a23efcc7SFlorian Hahn // wide loads. 3043a23efcc7SFlorian Hahn unsigned J = 0; 3044a23efcc7SFlorian Hahn for (unsigned I = 0; I < InterleaveFactor; ++I) { 3045a23efcc7SFlorian Hahn Instruction *Member = Group->getMember(I); 3046a23efcc7SFlorian Hahn 3047a23efcc7SFlorian Hahn // Skip the gaps in the group. 3048a23efcc7SFlorian Hahn if (!Member) 3049a23efcc7SFlorian Hahn continue; 3050a23efcc7SFlorian Hahn 3051a23efcc7SFlorian Hahn auto StrideMask = 3052a23efcc7SFlorian Hahn createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue()); 305306c3a7d2SFlorian Hahn Value *StridedVec = 305406c3a7d2SFlorian Hahn State.Builder.CreateShuffleVector(NewLoad, StrideMask, "strided.vec"); 3055a23efcc7SFlorian Hahn 3056a23efcc7SFlorian Hahn // If this member has different type, cast the result type. 3057a23efcc7SFlorian Hahn if (Member->getType() != ScalarTy) { 3058a23efcc7SFlorian Hahn assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); 3059a23efcc7SFlorian Hahn VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF); 3060a23efcc7SFlorian Hahn StridedVec = 3061a23efcc7SFlorian Hahn createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL); 3062a23efcc7SFlorian Hahn } 3063a23efcc7SFlorian Hahn 3064a23efcc7SFlorian Hahn if (Group->isReverse()) 3065a23efcc7SFlorian Hahn StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse"); 3066a23efcc7SFlorian Hahn 306757f5d8f2SFlorian Hahn State.set(VPDefs[J], StridedVec); 3068a23efcc7SFlorian Hahn ++J; 3069a23efcc7SFlorian Hahn } 3070a23efcc7SFlorian Hahn return; 3071a23efcc7SFlorian Hahn } 3072a23efcc7SFlorian Hahn 3073a23efcc7SFlorian Hahn // The sub vector type for current instruction. 3074a23efcc7SFlorian Hahn auto *SubVT = VectorType::get(ScalarTy, State.VF); 3075a23efcc7SFlorian Hahn 3076a23efcc7SFlorian Hahn // Vectorize the interleaved store group. 3077a23efcc7SFlorian Hahn Value *MaskForGaps = 3078a23efcc7SFlorian Hahn createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group); 3079a23efcc7SFlorian Hahn assert((!MaskForGaps || !State.VF.isScalable()) && 3080a23efcc7SFlorian Hahn "masking gaps for scalable vectors is not yet supported."); 3081a23efcc7SFlorian Hahn ArrayRef<VPValue *> StoredValues = getStoredValues(); 3082a23efcc7SFlorian Hahn // Collect the stored vector from each member. 3083a23efcc7SFlorian Hahn SmallVector<Value *, 4> StoredVecs; 3084a23efcc7SFlorian Hahn unsigned StoredIdx = 0; 3085a23efcc7SFlorian Hahn for (unsigned i = 0; i < InterleaveFactor; i++) { 3086a23efcc7SFlorian Hahn assert((Group->getMember(i) || MaskForGaps) && 3087a23efcc7SFlorian Hahn "Fail to get a member from an interleaved store group"); 3088a23efcc7SFlorian Hahn Instruction *Member = Group->getMember(i); 3089a23efcc7SFlorian Hahn 3090a23efcc7SFlorian Hahn // Skip the gaps in the group. 3091a23efcc7SFlorian Hahn if (!Member) { 3092a23efcc7SFlorian Hahn Value *Undef = PoisonValue::get(SubVT); 3093a23efcc7SFlorian Hahn StoredVecs.push_back(Undef); 3094a23efcc7SFlorian Hahn continue; 3095a23efcc7SFlorian Hahn } 3096a23efcc7SFlorian Hahn 309757f5d8f2SFlorian Hahn Value *StoredVec = State.get(StoredValues[StoredIdx]); 3098a23efcc7SFlorian Hahn ++StoredIdx; 3099a23efcc7SFlorian Hahn 3100a23efcc7SFlorian Hahn if (Group->isReverse()) 3101a23efcc7SFlorian Hahn StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse"); 3102a23efcc7SFlorian Hahn 3103a23efcc7SFlorian Hahn // If this member has different type, cast it to a unified type. 3104a23efcc7SFlorian Hahn 3105a23efcc7SFlorian Hahn if (StoredVec->getType() != SubVT) 3106a23efcc7SFlorian Hahn StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL); 3107a23efcc7SFlorian Hahn 3108a23efcc7SFlorian Hahn StoredVecs.push_back(StoredVec); 3109a23efcc7SFlorian Hahn } 3110a23efcc7SFlorian Hahn 3111a23efcc7SFlorian Hahn // Interleave all the smaller vectors into one wider vector. 311206c3a7d2SFlorian Hahn Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec"); 3113a23efcc7SFlorian Hahn Instruction *NewStoreInstr; 3114a23efcc7SFlorian Hahn if (BlockInMask || MaskForGaps) { 311506c3a7d2SFlorian Hahn Value *GroupMask = CreateGroupMask(MaskForGaps); 3116a23efcc7SFlorian Hahn NewStoreInstr = State.Builder.CreateMaskedStore( 311706c3a7d2SFlorian Hahn IVec, ResAddr, Group->getAlign(), GroupMask); 3118a23efcc7SFlorian Hahn } else 311906c3a7d2SFlorian Hahn NewStoreInstr = 312006c3a7d2SFlorian Hahn State.Builder.CreateAlignedStore(IVec, ResAddr, Group->getAlign()); 3121a23efcc7SFlorian Hahn 3122a23efcc7SFlorian Hahn Group->addMetadata(NewStoreInstr); 3123a23efcc7SFlorian Hahn } 3124a23efcc7SFlorian Hahn 3125a23efcc7SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3126a23efcc7SFlorian Hahn void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent, 3127a23efcc7SFlorian Hahn VPSlotTracker &SlotTracker) const { 3128a23efcc7SFlorian Hahn O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at "; 3129a23efcc7SFlorian Hahn IG->getInsertPos()->printAsOperand(O, false); 3130a23efcc7SFlorian Hahn O << ", "; 3131a23efcc7SFlorian Hahn getAddr()->printAsOperand(O, SlotTracker); 3132a23efcc7SFlorian Hahn VPValue *Mask = getMask(); 3133a23efcc7SFlorian Hahn if (Mask) { 3134a23efcc7SFlorian Hahn O << ", "; 3135a23efcc7SFlorian Hahn Mask->printAsOperand(O, SlotTracker); 3136a23efcc7SFlorian Hahn } 3137a23efcc7SFlorian Hahn 3138a23efcc7SFlorian Hahn unsigned OpIdx = 0; 3139a23efcc7SFlorian Hahn for (unsigned i = 0; i < IG->getFactor(); ++i) { 3140a23efcc7SFlorian Hahn if (!IG->getMember(i)) 3141a23efcc7SFlorian Hahn continue; 3142a23efcc7SFlorian Hahn if (getNumStoreOperands() > 0) { 3143a23efcc7SFlorian Hahn O << "\n" << Indent << " store "; 3144a23efcc7SFlorian Hahn getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker); 3145a23efcc7SFlorian Hahn O << " to index " << i; 3146a23efcc7SFlorian Hahn } else { 3147a23efcc7SFlorian Hahn O << "\n" << Indent << " "; 3148a23efcc7SFlorian Hahn getVPValue(OpIdx)->printAsOperand(O, SlotTracker); 3149a23efcc7SFlorian Hahn O << " = load from index " << i; 3150a23efcc7SFlorian Hahn } 3151a23efcc7SFlorian Hahn ++OpIdx; 3152a23efcc7SFlorian Hahn } 3153a23efcc7SFlorian Hahn } 3154a23efcc7SFlorian Hahn #endif 3155a23efcc7SFlorian Hahn 3156fa3258ecSFlorian Hahn InstructionCost VPInterleaveRecipe::computeCost(ElementCount VF, 3157fa3258ecSFlorian Hahn VPCostContext &Ctx) const { 31582a6b09e0SFlorian Hahn Instruction *InsertPos = getInsertPos(); 31592a6b09e0SFlorian Hahn // Find the VPValue index of the interleave group. We need to skip gaps. 31602a6b09e0SFlorian Hahn unsigned InsertPosIdx = 0; 31612a6b09e0SFlorian Hahn for (unsigned Idx = 0; IG->getFactor(); ++Idx) 31622a6b09e0SFlorian Hahn if (auto *Member = IG->getMember(Idx)) { 31632a6b09e0SFlorian Hahn if (Member == InsertPos) 31642a6b09e0SFlorian Hahn break; 31652a6b09e0SFlorian Hahn InsertPosIdx++; 31662a6b09e0SFlorian Hahn } 31672a46e5d0SFlorian Hahn Type *ValTy = Ctx.Types.inferScalarType( 31682a6b09e0SFlorian Hahn getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx) 31692a6b09e0SFlorian Hahn : getStoredValues()[InsertPosIdx]); 31709ab5474eSBenjamin Maxwell auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF)); 31712a6b09e0SFlorian Hahn unsigned AS = getLoadStoreAddressSpace(InsertPos); 31722a46e5d0SFlorian Hahn 31732a46e5d0SFlorian Hahn unsigned InterleaveFactor = IG->getFactor(); 31742a46e5d0SFlorian Hahn auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor); 31752a46e5d0SFlorian Hahn 31762a46e5d0SFlorian Hahn // Holds the indices of existing members in the interleaved group. 31772a46e5d0SFlorian Hahn SmallVector<unsigned, 4> Indices; 31782a46e5d0SFlorian Hahn for (unsigned IF = 0; IF < InterleaveFactor; IF++) 31792a46e5d0SFlorian Hahn if (IG->getMember(IF)) 31802a46e5d0SFlorian Hahn Indices.push_back(IF); 31812a46e5d0SFlorian Hahn 31822a46e5d0SFlorian Hahn // Calculate the cost of the whole interleaved group. 31832a46e5d0SFlorian Hahn InstructionCost Cost = Ctx.TTI.getInterleavedMemoryOpCost( 31842a6b09e0SFlorian Hahn InsertPos->getOpcode(), WideVecTy, IG->getFactor(), Indices, 3185edf3a55bSJohn Brawn IG->getAlign(), AS, Ctx.CostKind, getMask(), NeedsMaskForGaps); 31862a46e5d0SFlorian Hahn 31872a46e5d0SFlorian Hahn if (!IG->isReverse()) 31882a46e5d0SFlorian Hahn return Cost; 31892a46e5d0SFlorian Hahn 31902a46e5d0SFlorian Hahn return Cost + IG->getNumMembers() * 31912a46e5d0SFlorian Hahn Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, 3192edf3a55bSJohn Brawn VectorTy, std::nullopt, Ctx.CostKind, 3193edf3a55bSJohn Brawn 0); 3194fa3258ecSFlorian Hahn } 3195fa3258ecSFlorian Hahn 319603975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 319703975b7fSFlorian Hahn void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 319803975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const { 319903975b7fSFlorian Hahn O << Indent << "EMIT "; 320003975b7fSFlorian Hahn printAsOperand(O, SlotTracker); 320103975b7fSFlorian Hahn O << " = CANONICAL-INDUCTION "; 3202f7a8a78cSFlorian Hahn printOperands(O, SlotTracker); 320303975b7fSFlorian Hahn } 320403975b7fSFlorian Hahn #endif 320503975b7fSFlorian Hahn 32062906f362SFlorian Hahn bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) { 32072c692d89SFlorian Hahn return IsScalarAfterVectorization && 32082906f362SFlorian Hahn (!IsScalable || vputils::onlyFirstLaneUsed(this)); 320903975b7fSFlorian Hahn } 321003975b7fSFlorian Hahn 3211241349ffSFlorian Hahn void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { 321295e509a9SFlorian Hahn assert(getInductionDescriptor().getKind() == 321395e509a9SFlorian Hahn InductionDescriptor::IK_PtrInduction && 3214241349ffSFlorian Hahn "Not a pointer induction according to InductionDescriptor!"); 3215241349ffSFlorian Hahn assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() && 3216241349ffSFlorian Hahn "Unexpected type."); 3217241349ffSFlorian Hahn assert(!onlyScalarsGenerated(State.VF.isScalable()) && 3218241349ffSFlorian Hahn "Recipe should have been replaced"); 3219241349ffSFlorian Hahn 32208ec40675SFlorian Hahn unsigned CurrentPart = getUnrollPart(*this); 3221241349ffSFlorian Hahn 3222241349ffSFlorian Hahn // Build a pointer phi 3223241349ffSFlorian Hahn Value *ScalarStartValue = getStartValue()->getLiveInIRValue(); 3224241349ffSFlorian Hahn Type *ScStValueType = ScalarStartValue->getType(); 3225241349ffSFlorian Hahn 3226241349ffSFlorian Hahn BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 32278ec40675SFlorian Hahn PHINode *NewPointerPhi = nullptr; 32288ec40675SFlorian Hahn if (CurrentPart == 0) { 3229a7fda0e1SFlorian Hahn auto *IVR = cast<VPHeaderPHIRecipe>(&getParent() 3230a7fda0e1SFlorian Hahn ->getPlan() 3231a7fda0e1SFlorian Hahn ->getVectorLoopRegion() 3232a7fda0e1SFlorian Hahn ->getEntryBasicBlock() 3233a7fda0e1SFlorian Hahn ->front()); 3234a7fda0e1SFlorian Hahn PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, /*IsScalar*/ true)); 32358ec40675SFlorian Hahn NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi", 32368ec40675SFlorian Hahn CanonicalIV->getIterator()); 3237241349ffSFlorian Hahn NewPointerPhi->addIncoming(ScalarStartValue, VectorPH); 32382067e604SFlorian Hahn NewPointerPhi->setDebugLoc(getDebugLoc()); 32398ec40675SFlorian Hahn } else { 32408ec40675SFlorian Hahn // The recipe has been unrolled. In that case, fetch the single pointer phi 32418ec40675SFlorian Hahn // shared among all unrolled parts of the recipe. 32428ec40675SFlorian Hahn auto *GEP = 324357f5d8f2SFlorian Hahn cast<GetElementPtrInst>(State.get(getFirstUnrolledPartOperand())); 32448ec40675SFlorian Hahn NewPointerPhi = cast<PHINode>(GEP->getPointerOperand()); 32458ec40675SFlorian Hahn } 3246241349ffSFlorian Hahn 3247241349ffSFlorian Hahn // A pointer induction, performed by using a gep 3248241349ffSFlorian Hahn BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint(); 324995e509a9SFlorian Hahn Value *ScalarStepValue = State.get(getStepValue(), VPLane(0)); 325095e509a9SFlorian Hahn Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue()); 3251241349ffSFlorian Hahn Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF); 3252241349ffSFlorian Hahn // Add induction update using an incorrect block temporarily. The phi node 3253241349ffSFlorian Hahn // will be fixed after VPlan execution. Note that at this point the latch 3254241349ffSFlorian Hahn // block cannot be used, as it does not exist yet. 3255241349ffSFlorian Hahn // TODO: Model increment value in VPlan, by turning the recipe into a 3256241349ffSFlorian Hahn // multi-def and a subclass of VPHeaderPHIRecipe. 32578ec40675SFlorian Hahn if (CurrentPart == 0) { 32588ec40675SFlorian Hahn // The recipe represents the first part of the pointer induction. Create the 32598ec40675SFlorian Hahn // GEP to increment the phi across all unrolled parts. 32608ec40675SFlorian Hahn unsigned UF = CurrentPart == 0 ? getParent()->getPlan()->getUF() : 1; 32618ec40675SFlorian Hahn Value *NumUnrolledElems = 32628ec40675SFlorian Hahn State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, UF)); 32638ec40675SFlorian Hahn 32648ec40675SFlorian Hahn Value *InductionGEP = GetElementPtrInst::Create( 32658ec40675SFlorian Hahn State.Builder.getInt8Ty(), NewPointerPhi, 32668ec40675SFlorian Hahn State.Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind", 32678ec40675SFlorian Hahn InductionLoc); 32688ec40675SFlorian Hahn 3269241349ffSFlorian Hahn NewPointerPhi->addIncoming(InductionGEP, VectorPH); 32708ec40675SFlorian Hahn } 3271241349ffSFlorian Hahn 327206c3a7d2SFlorian Hahn // Create actual address geps that use the pointer phi as base and a 327306c3a7d2SFlorian Hahn // vectorized version of the step value (<step*0, ..., step*N>) as offset. 3274241349ffSFlorian Hahn Type *VecPhiType = VectorType::get(PhiType, State.VF); 32758ec40675SFlorian Hahn Value *StartOffsetScalar = State.Builder.CreateMul( 32768ec40675SFlorian Hahn RuntimeVF, ConstantInt::get(PhiType, CurrentPart)); 3277241349ffSFlorian Hahn Value *StartOffset = 3278241349ffSFlorian Hahn State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar); 3279241349ffSFlorian Hahn // Create a vector of consecutive numbers from zero to VF. 3280241349ffSFlorian Hahn StartOffset = State.Builder.CreateAdd( 3281241349ffSFlorian Hahn StartOffset, State.Builder.CreateStepVector(VecPhiType)); 3282241349ffSFlorian Hahn 3283aae7ac66SFlorian Hahn assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) && 3284241349ffSFlorian Hahn "scalar step must be the same across all parts"); 3285241349ffSFlorian Hahn Value *GEP = State.Builder.CreateGEP( 3286241349ffSFlorian Hahn State.Builder.getInt8Ty(), NewPointerPhi, 3287ffcff2f4SShih-Po Hung State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat( 3288ffcff2f4SShih-Po Hung State.VF, ScalarStepValue)), 3289ffcff2f4SShih-Po Hung "vector.gep"); 329057f5d8f2SFlorian Hahn State.set(this, GEP); 3291241349ffSFlorian Hahn } 3292241349ffSFlorian Hahn 329303975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 329403975b7fSFlorian Hahn void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, 329503975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const { 32968ec40675SFlorian Hahn assert((getNumOperands() == 2 || getNumOperands() == 4) && 32978ec40675SFlorian Hahn "unexpected number of operands"); 329803975b7fSFlorian Hahn O << Indent << "EMIT "; 329903975b7fSFlorian Hahn printAsOperand(O, SlotTracker); 330003975b7fSFlorian Hahn O << " = WIDEN-POINTER-INDUCTION "; 330103975b7fSFlorian Hahn getStartValue()->printAsOperand(O, SlotTracker); 3302e64650d7SFlorian Hahn O << ", "; 330395e509a9SFlorian Hahn getStepValue()->printAsOperand(O, SlotTracker); 33048ec40675SFlorian Hahn if (getNumOperands() == 4) { 33058ec40675SFlorian Hahn O << ", "; 33068ec40675SFlorian Hahn getOperand(2)->printAsOperand(O, SlotTracker); 33078ec40675SFlorian Hahn O << ", "; 33088ec40675SFlorian Hahn getOperand(3)->printAsOperand(O, SlotTracker); 33098ec40675SFlorian Hahn } 331003975b7fSFlorian Hahn } 331103975b7fSFlorian Hahn #endif 331203975b7fSFlorian Hahn 331303975b7fSFlorian Hahn void VPExpandSCEVRecipe::execute(VPTransformState &State) { 3314aae7ac66SFlorian Hahn assert(!State.Lane && "cannot be used in per-lane"); 33156c8f41d3SFlorian Hahn if (State.ExpandedSCEVs.contains(Expr)) { 33166c8f41d3SFlorian Hahn // SCEV Expr has already been expanded, result must already be set. At the 33176c8f41d3SFlorian Hahn // moment we have to execute the entry block twice (once before skeleton 33186c8f41d3SFlorian Hahn // creation to get expanded SCEVs used by the skeleton and once during 33196c8f41d3SFlorian Hahn // regular VPlan execution). 33206c8f41d3SFlorian Hahn State.Builder.SetInsertPoint(State.CFG.VPBB2IRBB[getParent()]); 33216c8f41d3SFlorian Hahn assert(State.get(this, VPLane(0)) == State.ExpandedSCEVs[Expr] && 33226c8f41d3SFlorian Hahn "Results must match"); 33236c8f41d3SFlorian Hahn return; 33246c8f41d3SFlorian Hahn } 33256c8f41d3SFlorian Hahn 33262d209d96SNikita Popov const DataLayout &DL = State.CFG.PrevBB->getDataLayout(); 332703975b7fSFlorian Hahn SCEVExpander Exp(SE, DL, "induction"); 332803975b7fSFlorian Hahn 332903975b7fSFlorian Hahn Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), 333003975b7fSFlorian Hahn &*State.Builder.GetInsertPoint()); 3331236a0e82SFlorian Hahn State.ExpandedSCEVs[Expr] = Res; 3332aae7ac66SFlorian Hahn State.set(this, Res, VPLane(0)); 333303975b7fSFlorian Hahn } 333403975b7fSFlorian Hahn 333503975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 333603975b7fSFlorian Hahn void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent, 333703975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const { 333803975b7fSFlorian Hahn O << Indent << "EMIT "; 33393829fd75SFlorian Hahn printAsOperand(O, SlotTracker); 334003975b7fSFlorian Hahn O << " = EXPAND SCEV " << *Expr; 334103975b7fSFlorian Hahn } 334203975b7fSFlorian Hahn #endif 334303975b7fSFlorian Hahn 334403975b7fSFlorian Hahn void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { 334557f5d8f2SFlorian Hahn Value *CanonicalIV = State.get(getOperand(0), /*IsScalar*/ true); 334603975b7fSFlorian Hahn Type *STy = CanonicalIV->getType(); 334703975b7fSFlorian Hahn IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 334803975b7fSFlorian Hahn ElementCount VF = State.VF; 334903975b7fSFlorian Hahn Value *VStart = VF.isScalar() 335003975b7fSFlorian Hahn ? CanonicalIV 335103975b7fSFlorian Hahn : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast"); 33528ec40675SFlorian Hahn Value *VStep = createStepForVF(Builder, STy, VF, getUnrollPart(*this)); 335303975b7fSFlorian Hahn if (VF.isVector()) { 335403975b7fSFlorian Hahn VStep = Builder.CreateVectorSplat(VF, VStep); 335503975b7fSFlorian Hahn VStep = 335603975b7fSFlorian Hahn Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType())); 335703975b7fSFlorian Hahn } 335803975b7fSFlorian Hahn Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); 335957f5d8f2SFlorian Hahn State.set(this, CanonicalVectorIV); 336003975b7fSFlorian Hahn } 336103975b7fSFlorian Hahn 336203975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 336303975b7fSFlorian Hahn void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, 336403975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const { 336503975b7fSFlorian Hahn O << Indent << "EMIT "; 336603975b7fSFlorian Hahn printAsOperand(O, SlotTracker); 336703975b7fSFlorian Hahn O << " = WIDEN-CANONICAL-INDUCTION "; 336803975b7fSFlorian Hahn printOperands(O, SlotTracker); 336903975b7fSFlorian Hahn } 337003975b7fSFlorian Hahn #endif 337103975b7fSFlorian Hahn 337203975b7fSFlorian Hahn void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { 337303975b7fSFlorian Hahn auto &Builder = State.Builder; 337403975b7fSFlorian Hahn // Create a vector from the initial value. 337503975b7fSFlorian Hahn auto *VectorInit = getStartValue()->getLiveInIRValue(); 337603975b7fSFlorian Hahn 337703975b7fSFlorian Hahn Type *VecTy = State.VF.isScalar() 337803975b7fSFlorian Hahn ? VectorInit->getType() 337903975b7fSFlorian Hahn : VectorType::get(VectorInit->getType(), State.VF); 338003975b7fSFlorian Hahn 338103975b7fSFlorian Hahn BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 338203975b7fSFlorian Hahn if (State.VF.isVector()) { 338303975b7fSFlorian Hahn auto *IdxTy = Builder.getInt32Ty(); 338403975b7fSFlorian Hahn auto *One = ConstantInt::get(IdxTy, 1); 338503975b7fSFlorian Hahn IRBuilder<>::InsertPointGuard Guard(Builder); 338603975b7fSFlorian Hahn Builder.SetInsertPoint(VectorPH->getTerminator()); 338703975b7fSFlorian Hahn auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); 338803975b7fSFlorian Hahn auto *LastIdx = Builder.CreateSub(RuntimeVF, One); 338903975b7fSFlorian Hahn VectorInit = Builder.CreateInsertElement( 339003975b7fSFlorian Hahn PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); 339103975b7fSFlorian Hahn } 339203975b7fSFlorian Hahn 339303975b7fSFlorian Hahn // Create a phi node for the new recurrence. 339406c3a7d2SFlorian Hahn PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur"); 339506c3a7d2SFlorian Hahn Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); 339606c3a7d2SFlorian Hahn Phi->addIncoming(VectorInit, VectorPH); 339757f5d8f2SFlorian Hahn State.set(this, Phi); 339803975b7fSFlorian Hahn } 339903975b7fSFlorian Hahn 3400680901edSFlorian Hahn InstructionCost 3401680901edSFlorian Hahn VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF, 3402680901edSFlorian Hahn VPCostContext &Ctx) const { 340332003857SFlorian Hahn if (VF.isScalar()) 3404edf3a55bSJohn Brawn return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind); 340532003857SFlorian Hahn 3406680901edSFlorian Hahn if (VF.isScalable() && VF.getKnownMinValue() == 1) 3407680901edSFlorian Hahn return InstructionCost::getInvalid(); 3408680901edSFlorian Hahn 3409680901edSFlorian Hahn SmallVector<int> Mask(VF.getKnownMinValue()); 3410680901edSFlorian Hahn std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1); 3411680901edSFlorian Hahn Type *VectorTy = 34129ab5474eSBenjamin Maxwell toVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF); 3413680901edSFlorian Hahn 3414680901edSFlorian Hahn return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Splice, 3415edf3a55bSJohn Brawn cast<VectorType>(VectorTy), Mask, Ctx.CostKind, 3416680901edSFlorian Hahn VF.getKnownMinValue() - 1); 3417680901edSFlorian Hahn } 3418680901edSFlorian Hahn 341903975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 342003975b7fSFlorian Hahn void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, 342103975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const { 342203975b7fSFlorian Hahn O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; 342303975b7fSFlorian Hahn printAsOperand(O, SlotTracker); 342403975b7fSFlorian Hahn O << " = phi "; 342503975b7fSFlorian Hahn printOperands(O, SlotTracker); 342603975b7fSFlorian Hahn } 342703975b7fSFlorian Hahn #endif 342803975b7fSFlorian Hahn 342903975b7fSFlorian Hahn void VPReductionPHIRecipe::execute(VPTransformState &State) { 343003975b7fSFlorian Hahn auto &Builder = State.Builder; 343103975b7fSFlorian Hahn 3432795e35a6SSam Tebbs // If this phi is fed by a scaled reduction then it should output a 3433795e35a6SSam Tebbs // vector with fewer elements than the VF. 3434795e35a6SSam Tebbs ElementCount VF = State.VF.divideCoefficientBy(VFScaleFactor); 3435795e35a6SSam Tebbs 34366011d6b2SFlorian Hahn // Reductions do not have to start at zero. They can start with 34376011d6b2SFlorian Hahn // any loop invariant values. 34386011d6b2SFlorian Hahn VPValue *StartVPV = getStartValue(); 34396011d6b2SFlorian Hahn Value *StartV = StartVPV->getLiveInIRValue(); 34406011d6b2SFlorian Hahn 344103975b7fSFlorian Hahn // In order to support recurrences we need to be able to vectorize Phi nodes. 344203975b7fSFlorian Hahn // Phi nodes have cycles, so we need to vectorize them in two stages. This is 344303975b7fSFlorian Hahn // stage #1: We create a new vector PHI node with no incoming edges. We'll use 344403975b7fSFlorian Hahn // this value when we vectorize all of the instructions that use the PHI. 34455f096fd2SFlorian Hahn bool ScalarPHI = State.VF.isScalar() || IsInLoop; 3446795e35a6SSam Tebbs Type *VecTy = 3447795e35a6SSam Tebbs ScalarPHI ? StartV->getType() : VectorType::get(StartV->getType(), VF); 344803975b7fSFlorian Hahn 344903975b7fSFlorian Hahn BasicBlock *HeaderBB = State.CFG.PrevBB; 3450b06a45c6SFlorian Hahn assert(State.CurrentParentLoop->getHeader() == HeaderBB && 345103975b7fSFlorian Hahn "recipe must be in the vector loop header"); 345206c3a7d2SFlorian Hahn auto *Phi = PHINode::Create(VecTy, 2, "vec.phi"); 345306c3a7d2SFlorian Hahn Phi->insertBefore(HeaderBB->getFirstInsertionPt()); 345457f5d8f2SFlorian Hahn State.set(this, Phi, IsInLoop); 345503975b7fSFlorian Hahn 345603975b7fSFlorian Hahn BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 345703975b7fSFlorian Hahn 345803975b7fSFlorian Hahn Value *Iden = nullptr; 345903975b7fSFlorian Hahn RecurKind RK = RdxDesc.getRecurrenceKind(); 34608ec40675SFlorian Hahn unsigned CurrentPart = getUnrollPart(*this); 34618ec40675SFlorian Hahn 346203975b7fSFlorian Hahn if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || 3463425e9e81SMel Chen RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) { 3464425e9e81SMel Chen // MinMax and AnyOf reductions have the start value as their identity. 346503975b7fSFlorian Hahn if (ScalarPHI) { 346603975b7fSFlorian Hahn Iden = StartV; 346703975b7fSFlorian Hahn } else { 346803975b7fSFlorian Hahn IRBuilderBase::InsertPointGuard IPBuilder(Builder); 346903975b7fSFlorian Hahn Builder.SetInsertPoint(VectorPH->getTerminator()); 347057f5d8f2SFlorian Hahn StartV = Iden = State.get(StartVPV); 347103975b7fSFlorian Hahn } 3472b3cba9beSMel Chen } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) { 3473b3cba9beSMel Chen // [I|F]FindLastIV will use a sentinel value to initialize the reduction 34740e528ac4SFlorian Hahn // phi or the resume value from the main vector loop when vectorizing the 34750e528ac4SFlorian Hahn // epilogue loop. In the exit block, ComputeReductionResult will generate 34760e528ac4SFlorian Hahn // checks to verify if the reduction result is the sentinel value. If the 34770e528ac4SFlorian Hahn // result is the sentinel value, it will be corrected back to the start 34780e528ac4SFlorian Hahn // value. 3479b3cba9beSMel Chen // TODO: The sentinel value is not always necessary. When the start value is 3480b3cba9beSMel Chen // a constant, and smaller than the start value of the induction variable, 3481b3cba9beSMel Chen // the start value can be directly used to initialize the reduction phi. 3482eb59fe8dSFlorian Hahn Iden = StartV; 3483b3cba9beSMel Chen if (!ScalarPHI) { 3484b3cba9beSMel Chen IRBuilderBase::InsertPointGuard IPBuilder(Builder); 3485b3cba9beSMel Chen Builder.SetInsertPoint(VectorPH->getTerminator()); 3486b3cba9beSMel Chen StartV = Iden = Builder.CreateVectorSplat(State.VF, Iden); 3487b3cba9beSMel Chen } 348803975b7fSFlorian Hahn } else { 34893d9abfc9SPhilip Reames Iden = llvm::getRecurrenceIdentity(RK, VecTy->getScalarType(), 349003975b7fSFlorian Hahn RdxDesc.getFastMathFlags()); 349103975b7fSFlorian Hahn 349203975b7fSFlorian Hahn if (!ScalarPHI) { 34938ec40675SFlorian Hahn if (CurrentPart == 0) { 34948ec40675SFlorian Hahn // Create start and identity vector values for the reduction in the 34958ec40675SFlorian Hahn // preheader. 34968ec40675SFlorian Hahn // TODO: Introduce recipes in VPlan preheader to create initial values. 3497795e35a6SSam Tebbs Iden = Builder.CreateVectorSplat(VF, Iden); 349803975b7fSFlorian Hahn IRBuilderBase::InsertPointGuard IPBuilder(Builder); 349903975b7fSFlorian Hahn Builder.SetInsertPoint(VectorPH->getTerminator()); 350003975b7fSFlorian Hahn Constant *Zero = Builder.getInt32(0); 350103975b7fSFlorian Hahn StartV = Builder.CreateInsertElement(Iden, StartV, Zero); 35028ec40675SFlorian Hahn } else { 3503795e35a6SSam Tebbs Iden = Builder.CreateVectorSplat(VF, Iden); 35048ec40675SFlorian Hahn } 350503975b7fSFlorian Hahn } 350603975b7fSFlorian Hahn } 350703975b7fSFlorian Hahn 350857f5d8f2SFlorian Hahn Phi = cast<PHINode>(State.get(this, IsInLoop)); 35098ec40675SFlorian Hahn Value *StartVal = (CurrentPart == 0) ? StartV : Iden; 351006c3a7d2SFlorian Hahn Phi->addIncoming(StartVal, VectorPH); 351103975b7fSFlorian Hahn } 351203975b7fSFlorian Hahn 351303975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 351403975b7fSFlorian Hahn void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, 351503975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const { 351603975b7fSFlorian Hahn O << Indent << "WIDEN-REDUCTION-PHI "; 351703975b7fSFlorian Hahn 351803975b7fSFlorian Hahn printAsOperand(O, SlotTracker); 351903975b7fSFlorian Hahn O << " = phi "; 352003975b7fSFlorian Hahn printOperands(O, SlotTracker); 3521795e35a6SSam Tebbs if (VFScaleFactor != 1) 3522795e35a6SSam Tebbs O << " (VF scaled by 1/" << VFScaleFactor << ")"; 352303975b7fSFlorian Hahn } 352403975b7fSFlorian Hahn #endif 352503975b7fSFlorian Hahn 352603975b7fSFlorian Hahn void VPWidenPHIRecipe::execute(VPTransformState &State) { 352703975b7fSFlorian Hahn assert(EnableVPlanNativePath && 352803975b7fSFlorian Hahn "Non-native vplans are not expected to have VPWidenPHIRecipes."); 352903975b7fSFlorian Hahn 3530aff1242bSElvis Wang State.setDebugLocFrom(getDebugLoc()); 353157f5d8f2SFlorian Hahn Value *Op0 = State.get(getOperand(0)); 353203975b7fSFlorian Hahn Type *VecTy = Op0->getType(); 353303975b7fSFlorian Hahn Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); 353457f5d8f2SFlorian Hahn State.set(this, VecPhi); 353503975b7fSFlorian Hahn } 353603975b7fSFlorian Hahn 353703975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 353803975b7fSFlorian Hahn void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, 353903975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const { 354003975b7fSFlorian Hahn O << Indent << "WIDEN-PHI "; 354103975b7fSFlorian Hahn 354203975b7fSFlorian Hahn auto *OriginalPhi = cast<PHINode>(getUnderlyingValue()); 354303975b7fSFlorian Hahn // Unless all incoming values are modeled in VPlan print the original PHI 354403975b7fSFlorian Hahn // directly. 354503975b7fSFlorian Hahn // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming 354603975b7fSFlorian Hahn // values as VPValues. 354703975b7fSFlorian Hahn if (getNumOperands() != OriginalPhi->getNumOperands()) { 354803975b7fSFlorian Hahn O << VPlanIngredient(OriginalPhi); 354903975b7fSFlorian Hahn return; 355003975b7fSFlorian Hahn } 355103975b7fSFlorian Hahn 355203975b7fSFlorian Hahn printAsOperand(O, SlotTracker); 355303975b7fSFlorian Hahn O << " = phi "; 355403975b7fSFlorian Hahn printOperands(O, SlotTracker); 355503975b7fSFlorian Hahn } 355603975b7fSFlorian Hahn #endif 355703fee671SDavid Sherwood 355803fee671SDavid Sherwood // TODO: It would be good to use the existing VPWidenPHIRecipe instead and 355903fee671SDavid Sherwood // remove VPActiveLaneMaskPHIRecipe. 356003fee671SDavid Sherwood void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) { 356103fee671SDavid Sherwood BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 356257f5d8f2SFlorian Hahn Value *StartMask = State.get(getOperand(0)); 356306c3a7d2SFlorian Hahn PHINode *Phi = 356403fee671SDavid Sherwood State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask"); 356506c3a7d2SFlorian Hahn Phi->addIncoming(StartMask, VectorPH); 356606c3a7d2SFlorian Hahn Phi->setDebugLoc(getDebugLoc()); 356757f5d8f2SFlorian Hahn State.set(this, Phi); 356803fee671SDavid Sherwood } 356903fee671SDavid Sherwood 357003fee671SDavid Sherwood #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 357103fee671SDavid Sherwood void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent, 357203fee671SDavid Sherwood VPSlotTracker &SlotTracker) const { 357303fee671SDavid Sherwood O << Indent << "ACTIVE-LANE-MASK-PHI "; 357403fee671SDavid Sherwood 357503fee671SDavid Sherwood printAsOperand(O, SlotTracker); 357603fee671SDavid Sherwood O << " = phi "; 357703fee671SDavid Sherwood printOperands(O, SlotTracker); 357803fee671SDavid Sherwood } 357903fee671SDavid Sherwood #endif 3580413a66f3SAlexey Bataev 3581a7fda0e1SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3582a7fda0e1SFlorian Hahn void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 3583a7fda0e1SFlorian Hahn VPSlotTracker &SlotTracker) const { 3584a7fda0e1SFlorian Hahn O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI "; 3585a7fda0e1SFlorian Hahn 3586a7fda0e1SFlorian Hahn printAsOperand(O, SlotTracker); 3587a7fda0e1SFlorian Hahn O << " = phi "; 3588a7fda0e1SFlorian Hahn printOperands(O, SlotTracker); 3589a7fda0e1SFlorian Hahn } 3590a7fda0e1SFlorian Hahn #endif 3591a7fda0e1SFlorian Hahn 3592a7fda0e1SFlorian Hahn void VPScalarPHIRecipe::execute(VPTransformState &State) { 3593413a66f3SAlexey Bataev BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 3594afef545eSFlorian Hahn Value *Start = State.get(getStartValue(), VPLane(0)); 3595a7fda0e1SFlorian Hahn PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name); 359606c3a7d2SFlorian Hahn Phi->addIncoming(Start, VectorPH); 359706c3a7d2SFlorian Hahn Phi->setDebugLoc(getDebugLoc()); 359857f5d8f2SFlorian Hahn State.set(this, Phi, /*IsScalar=*/true); 3599413a66f3SAlexey Bataev } 3600413a66f3SAlexey Bataev 3601413a66f3SAlexey Bataev #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3602a7fda0e1SFlorian Hahn void VPScalarPHIRecipe::print(raw_ostream &O, const Twine &Indent, 3603413a66f3SAlexey Bataev VPSlotTracker &SlotTracker) const { 3604a7fda0e1SFlorian Hahn O << Indent << "SCALAR-PHI "; 3605413a66f3SAlexey Bataev printAsOperand(O, SlotTracker); 3606413a66f3SAlexey Bataev O << " = phi "; 3607413a66f3SAlexey Bataev printOperands(O, SlotTracker); 3608413a66f3SAlexey Bataev } 3609413a66f3SAlexey Bataev #endif 3610