181ad6265SDimitry Andric //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===// 281ad6265SDimitry Andric // 381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 681ad6265SDimitry Andric // 781ad6265SDimitry Andric //===----------------------------------------------------------------------===// 881ad6265SDimitry Andric /// 981ad6265SDimitry Andric /// \file 1081ad6265SDimitry Andric /// This file contains implementations for different VPlan recipes. 1181ad6265SDimitry Andric /// 1281ad6265SDimitry Andric //===----------------------------------------------------------------------===// 1381ad6265SDimitry Andric 1481ad6265SDimitry Andric #include "VPlan.h" 155f757f3fSDimitry Andric #include "VPlanAnalysis.h" 1681ad6265SDimitry Andric #include "llvm/ADT/STLExtras.h" 1781ad6265SDimitry Andric #include "llvm/ADT/SmallVector.h" 1881ad6265SDimitry Andric #include "llvm/ADT/Twine.h" 1981ad6265SDimitry Andric #include "llvm/Analysis/IVDescriptors.h" 2081ad6265SDimitry Andric #include "llvm/IR/BasicBlock.h" 2181ad6265SDimitry Andric #include "llvm/IR/IRBuilder.h" 2281ad6265SDimitry Andric #include "llvm/IR/Instruction.h" 2381ad6265SDimitry Andric #include "llvm/IR/Instructions.h" 2481ad6265SDimitry Andric #include "llvm/IR/Type.h" 2581ad6265SDimitry Andric #include "llvm/IR/Value.h" 2681ad6265SDimitry Andric #include "llvm/Support/Casting.h" 2781ad6265SDimitry Andric #include "llvm/Support/CommandLine.h" 2881ad6265SDimitry Andric #include "llvm/Support/Debug.h" 2981ad6265SDimitry Andric #include "llvm/Support/raw_ostream.h" 30753f127fSDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 311db9f3b2SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h" 3281ad6265SDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 3381ad6265SDimitry Andric #include <cassert> 3481ad6265SDimitry Andric 3581ad6265SDimitry Andric using namespace llvm; 3681ad6265SDimitry Andric 37753f127fSDimitry Andric using VectorParts = SmallVector<Value *, 2>; 38753f127fSDimitry Andric 3906c3fb27SDimitry Andric namespace llvm { 4081ad6265SDimitry Andric extern cl::opt<bool> EnableVPlanNativePath; 4106c3fb27SDimitry Andric } 42*0fca6ea1SDimitry Andric extern cl::opt<unsigned> ForceTargetInstructionCost; 4381ad6265SDimitry Andric 44753f127fSDimitry Andric #define LV_NAME "loop-vectorize" 45753f127fSDimitry Andric #define DEBUG_TYPE LV_NAME 46753f127fSDimitry Andric 4781ad6265SDimitry Andric bool VPRecipeBase::mayWriteToMemory() const { 4881ad6265SDimitry Andric switch (getVPDefID()) { 495f757f3fSDimitry Andric case VPInterleaveSC: 505f757f3fSDimitry Andric return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0; 51*0fca6ea1SDimitry Andric case VPWidenStoreEVLSC: 52*0fca6ea1SDimitry Andric case VPWidenStoreSC: 53*0fca6ea1SDimitry Andric return true; 5481ad6265SDimitry Andric case VPReplicateSC: 5581ad6265SDimitry Andric return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 5681ad6265SDimitry Andric ->mayWriteToMemory(); 57*0fca6ea1SDimitry Andric case VPWidenCallSC: 58*0fca6ea1SDimitry Andric return !cast<VPWidenCallRecipe>(this) 59*0fca6ea1SDimitry Andric ->getCalledScalarFunction() 60*0fca6ea1SDimitry Andric ->onlyReadsMemory(); 6181ad6265SDimitry Andric case VPBranchOnMaskSC: 62bdd1243dSDimitry Andric case VPScalarIVStepsSC: 6306c3fb27SDimitry Andric case VPPredInstPHISC: 6481ad6265SDimitry Andric return false; 6581ad6265SDimitry Andric case VPBlendSC: 66*0fca6ea1SDimitry Andric case VPReductionEVLSC: 6781ad6265SDimitry Andric case VPReductionSC: 6806c3fb27SDimitry Andric case VPWidenCanonicalIVSC: 6906c3fb27SDimitry Andric case VPWidenCastSC: 7006c3fb27SDimitry Andric case VPWidenGEPSC: 7106c3fb27SDimitry Andric case VPWidenIntOrFpInductionSC: 72*0fca6ea1SDimitry Andric case VPWidenLoadEVLSC: 73*0fca6ea1SDimitry Andric case VPWidenLoadSC: 7406c3fb27SDimitry Andric case VPWidenPHISC: 7506c3fb27SDimitry Andric case VPWidenSC: 7681ad6265SDimitry Andric case VPWidenSelectSC: { 7781ad6265SDimitry Andric const Instruction *I = 7881ad6265SDimitry Andric dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 7981ad6265SDimitry Andric (void)I; 8081ad6265SDimitry Andric assert((!I || !I->mayWriteToMemory()) && 8181ad6265SDimitry Andric "underlying instruction may write to memory"); 8281ad6265SDimitry Andric return false; 8381ad6265SDimitry Andric } 8481ad6265SDimitry Andric default: 8581ad6265SDimitry Andric return true; 8681ad6265SDimitry Andric } 8781ad6265SDimitry Andric } 8881ad6265SDimitry Andric 8981ad6265SDimitry Andric bool VPRecipeBase::mayReadFromMemory() const { 9081ad6265SDimitry Andric switch (getVPDefID()) { 91*0fca6ea1SDimitry Andric case VPWidenLoadEVLSC: 92*0fca6ea1SDimitry Andric case VPWidenLoadSC: 93*0fca6ea1SDimitry Andric return true; 9481ad6265SDimitry Andric case VPReplicateSC: 9581ad6265SDimitry Andric return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 9681ad6265SDimitry Andric ->mayReadFromMemory(); 97*0fca6ea1SDimitry Andric case VPWidenCallSC: 98*0fca6ea1SDimitry Andric return !cast<VPWidenCallRecipe>(this) 99*0fca6ea1SDimitry Andric ->getCalledScalarFunction() 100*0fca6ea1SDimitry Andric ->onlyWritesMemory(); 10181ad6265SDimitry Andric case VPBranchOnMaskSC: 10206c3fb27SDimitry Andric case VPPredInstPHISC: 103*0fca6ea1SDimitry Andric case VPScalarIVStepsSC: 104*0fca6ea1SDimitry Andric case VPWidenStoreEVLSC: 105*0fca6ea1SDimitry Andric case VPWidenStoreSC: 10681ad6265SDimitry Andric return false; 10781ad6265SDimitry Andric case VPBlendSC: 108*0fca6ea1SDimitry Andric case VPReductionEVLSC: 10981ad6265SDimitry Andric case VPReductionSC: 11006c3fb27SDimitry Andric case VPWidenCanonicalIVSC: 11106c3fb27SDimitry Andric case VPWidenCastSC: 11206c3fb27SDimitry Andric case VPWidenGEPSC: 11306c3fb27SDimitry Andric case VPWidenIntOrFpInductionSC: 11406c3fb27SDimitry Andric case VPWidenPHISC: 11506c3fb27SDimitry Andric case VPWidenSC: 11681ad6265SDimitry Andric case VPWidenSelectSC: { 11781ad6265SDimitry Andric const Instruction *I = 11881ad6265SDimitry Andric dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 11981ad6265SDimitry Andric (void)I; 12081ad6265SDimitry Andric assert((!I || !I->mayReadFromMemory()) && 12181ad6265SDimitry Andric "underlying instruction may read from memory"); 12281ad6265SDimitry Andric return false; 12381ad6265SDimitry Andric } 12481ad6265SDimitry Andric default: 12581ad6265SDimitry Andric return true; 12681ad6265SDimitry Andric } 12781ad6265SDimitry Andric } 12881ad6265SDimitry Andric 12981ad6265SDimitry Andric bool VPRecipeBase::mayHaveSideEffects() const { 13081ad6265SDimitry Andric switch (getVPDefID()) { 131bdd1243dSDimitry Andric case VPDerivedIVSC: 132bdd1243dSDimitry Andric case VPPredInstPHISC: 133*0fca6ea1SDimitry Andric case VPScalarCastSC: 134bdd1243dSDimitry Andric return false; 1355f757f3fSDimitry Andric case VPInstructionSC: 1365f757f3fSDimitry Andric switch (cast<VPInstruction>(this)->getOpcode()) { 1371db9f3b2SDimitry Andric case Instruction::Or: 1385f757f3fSDimitry Andric case Instruction::ICmp: 1391db9f3b2SDimitry Andric case Instruction::Select: 1405f757f3fSDimitry Andric case VPInstruction::Not: 1415f757f3fSDimitry Andric case VPInstruction::CalculateTripCountMinusVF: 1425f757f3fSDimitry Andric case VPInstruction::CanonicalIVIncrementForPart: 143*0fca6ea1SDimitry Andric case VPInstruction::ExtractFromEnd: 144*0fca6ea1SDimitry Andric case VPInstruction::FirstOrderRecurrenceSplice: 145*0fca6ea1SDimitry Andric case VPInstruction::LogicalAnd: 146*0fca6ea1SDimitry Andric case VPInstruction::PtrAdd: 1475f757f3fSDimitry Andric return false; 1485f757f3fSDimitry Andric default: 1495f757f3fSDimitry Andric return true; 1505f757f3fSDimitry Andric } 151*0fca6ea1SDimitry Andric case VPWidenCallSC: { 152*0fca6ea1SDimitry Andric Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction(); 153*0fca6ea1SDimitry Andric return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn(); 154*0fca6ea1SDimitry Andric } 15581ad6265SDimitry Andric case VPBlendSC: 156*0fca6ea1SDimitry Andric case VPReductionEVLSC: 15781ad6265SDimitry Andric case VPReductionSC: 15806c3fb27SDimitry Andric case VPScalarIVStepsSC: 15906c3fb27SDimitry Andric case VPWidenCanonicalIVSC: 16006c3fb27SDimitry Andric case VPWidenCastSC: 16106c3fb27SDimitry Andric case VPWidenGEPSC: 16206c3fb27SDimitry Andric case VPWidenIntOrFpInductionSC: 16306c3fb27SDimitry Andric case VPWidenPHISC: 16406c3fb27SDimitry Andric case VPWidenPointerInductionSC: 16506c3fb27SDimitry Andric case VPWidenSC: 16606c3fb27SDimitry Andric case VPWidenSelectSC: { 16781ad6265SDimitry Andric const Instruction *I = 16881ad6265SDimitry Andric dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 16981ad6265SDimitry Andric (void)I; 17081ad6265SDimitry Andric assert((!I || !I->mayHaveSideEffects()) && 17181ad6265SDimitry Andric "underlying instruction has side-effects"); 17281ad6265SDimitry Andric return false; 17381ad6265SDimitry Andric } 1745f757f3fSDimitry Andric case VPInterleaveSC: 1755f757f3fSDimitry Andric return mayWriteToMemory(); 176*0fca6ea1SDimitry Andric case VPWidenLoadEVLSC: 177*0fca6ea1SDimitry Andric case VPWidenLoadSC: 178*0fca6ea1SDimitry Andric case VPWidenStoreEVLSC: 179*0fca6ea1SDimitry Andric case VPWidenStoreSC: 180*0fca6ea1SDimitry Andric assert( 181*0fca6ea1SDimitry Andric cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() == 182*0fca6ea1SDimitry Andric mayWriteToMemory() && 18306c3fb27SDimitry Andric "mayHaveSideffects result for ingredient differs from this " 18406c3fb27SDimitry Andric "implementation"); 18506c3fb27SDimitry Andric return mayWriteToMemory(); 18681ad6265SDimitry Andric case VPReplicateSC: { 18781ad6265SDimitry Andric auto *R = cast<VPReplicateRecipe>(this); 18881ad6265SDimitry Andric return R->getUnderlyingInstr()->mayHaveSideEffects(); 18981ad6265SDimitry Andric } 19081ad6265SDimitry Andric default: 19181ad6265SDimitry Andric return true; 19281ad6265SDimitry Andric } 19381ad6265SDimitry Andric } 19481ad6265SDimitry Andric 19581ad6265SDimitry Andric void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { 19681ad6265SDimitry Andric VPValue *ExitValue = getOperand(0); 197*0fca6ea1SDimitry Andric auto Lane = vputils::isUniformAfterVectorization(ExitValue) 198*0fca6ea1SDimitry Andric ? VPLane::getFirstLane() 199*0fca6ea1SDimitry Andric : VPLane::getLastLaneForVF(State.VF); 2005f757f3fSDimitry Andric VPBasicBlock *MiddleVPBB = 2015f757f3fSDimitry Andric cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor()); 202*0fca6ea1SDimitry Andric VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe(); 203*0fca6ea1SDimitry Andric auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr; 204*0fca6ea1SDimitry Andric // Values leaving the vector loop reach live out phi's in the exiting block 205*0fca6ea1SDimitry Andric // via middle block. 206*0fca6ea1SDimitry Andric auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion() 207*0fca6ea1SDimitry Andric ? MiddleVPBB 208*0fca6ea1SDimitry Andric : ExitingVPBB; 209*0fca6ea1SDimitry Andric BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; 210*0fca6ea1SDimitry Andric // Set insertion point in PredBB in case an extract needs to be generated. 211*0fca6ea1SDimitry Andric // TODO: Model extracts explicitly. 212*0fca6ea1SDimitry Andric State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt()); 213*0fca6ea1SDimitry Andric Value *V = State.get(ExitValue, VPIteration(State.UF - 1, Lane)); 214*0fca6ea1SDimitry Andric if (Phi->getBasicBlockIndex(PredBB) != -1) 215*0fca6ea1SDimitry Andric Phi->setIncomingValueForBlock(PredBB, V); 216*0fca6ea1SDimitry Andric else 217*0fca6ea1SDimitry Andric Phi->addIncoming(V, PredBB); 21881ad6265SDimitry Andric } 21981ad6265SDimitry Andric 22006c3fb27SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 22106c3fb27SDimitry Andric void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const { 22206c3fb27SDimitry Andric O << "Live-out "; 22306c3fb27SDimitry Andric getPhi()->printAsOperand(O); 22406c3fb27SDimitry Andric O << " = "; 22506c3fb27SDimitry Andric getOperand(0)->printAsOperand(O, SlotTracker); 22606c3fb27SDimitry Andric O << "\n"; 22706c3fb27SDimitry Andric } 22806c3fb27SDimitry Andric #endif 22906c3fb27SDimitry Andric 23081ad6265SDimitry Andric void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { 23181ad6265SDimitry Andric assert(!Parent && "Recipe already in some VPBasicBlock"); 23281ad6265SDimitry Andric assert(InsertPos->getParent() && 23381ad6265SDimitry Andric "Insertion position not in any VPBasicBlock"); 234*0fca6ea1SDimitry Andric InsertPos->getParent()->insert(this, InsertPos->getIterator()); 23581ad6265SDimitry Andric } 23681ad6265SDimitry Andric 23781ad6265SDimitry Andric void VPRecipeBase::insertBefore(VPBasicBlock &BB, 23881ad6265SDimitry Andric iplist<VPRecipeBase>::iterator I) { 23981ad6265SDimitry Andric assert(!Parent && "Recipe already in some VPBasicBlock"); 24081ad6265SDimitry Andric assert(I == BB.end() || I->getParent() == &BB); 241*0fca6ea1SDimitry Andric BB.insert(this, I); 24281ad6265SDimitry Andric } 24381ad6265SDimitry Andric 24481ad6265SDimitry Andric void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { 24581ad6265SDimitry Andric assert(!Parent && "Recipe already in some VPBasicBlock"); 24681ad6265SDimitry Andric assert(InsertPos->getParent() && 24781ad6265SDimitry Andric "Insertion position not in any VPBasicBlock"); 248*0fca6ea1SDimitry Andric InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator())); 24981ad6265SDimitry Andric } 25081ad6265SDimitry Andric 25181ad6265SDimitry Andric void VPRecipeBase::removeFromParent() { 25281ad6265SDimitry Andric assert(getParent() && "Recipe not in any VPBasicBlock"); 25381ad6265SDimitry Andric getParent()->getRecipeList().remove(getIterator()); 25481ad6265SDimitry Andric Parent = nullptr; 25581ad6265SDimitry Andric } 25681ad6265SDimitry Andric 25781ad6265SDimitry Andric iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { 25881ad6265SDimitry Andric assert(getParent() && "Recipe not in any VPBasicBlock"); 25981ad6265SDimitry Andric return getParent()->getRecipeList().erase(getIterator()); 26081ad6265SDimitry Andric } 26181ad6265SDimitry Andric 26281ad6265SDimitry Andric void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { 26381ad6265SDimitry Andric removeFromParent(); 26481ad6265SDimitry Andric insertAfter(InsertPos); 26581ad6265SDimitry Andric } 26681ad6265SDimitry Andric 26781ad6265SDimitry Andric void VPRecipeBase::moveBefore(VPBasicBlock &BB, 26881ad6265SDimitry Andric iplist<VPRecipeBase>::iterator I) { 26981ad6265SDimitry Andric removeFromParent(); 27081ad6265SDimitry Andric insertBefore(BB, I); 27181ad6265SDimitry Andric } 27281ad6265SDimitry Andric 273*0fca6ea1SDimitry Andric /// Return the underlying instruction to be used for computing \p R's cost via 274*0fca6ea1SDimitry Andric /// the legacy cost model. Return nullptr if there's no suitable instruction. 275*0fca6ea1SDimitry Andric static Instruction *getInstructionForCost(const VPRecipeBase *R) { 276*0fca6ea1SDimitry Andric if (auto *S = dyn_cast<VPSingleDefRecipe>(R)) 277*0fca6ea1SDimitry Andric return dyn_cast_or_null<Instruction>(S->getUnderlyingValue()); 278*0fca6ea1SDimitry Andric if (auto *IG = dyn_cast<VPInterleaveRecipe>(R)) 279*0fca6ea1SDimitry Andric return IG->getInsertPos(); 280*0fca6ea1SDimitry Andric if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(R)) 281*0fca6ea1SDimitry Andric return &WidenMem->getIngredient(); 282*0fca6ea1SDimitry Andric return nullptr; 283*0fca6ea1SDimitry Andric } 284*0fca6ea1SDimitry Andric 285*0fca6ea1SDimitry Andric InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) { 286*0fca6ea1SDimitry Andric if (auto *UI = getInstructionForCost(this)) 287*0fca6ea1SDimitry Andric if (Ctx.skipCostComputation(UI, VF.isVector())) 288*0fca6ea1SDimitry Andric return 0; 289*0fca6ea1SDimitry Andric 290*0fca6ea1SDimitry Andric InstructionCost RecipeCost = computeCost(VF, Ctx); 291*0fca6ea1SDimitry Andric if (ForceTargetInstructionCost.getNumOccurrences() > 0 && 292*0fca6ea1SDimitry Andric RecipeCost.isValid()) 293*0fca6ea1SDimitry Andric RecipeCost = InstructionCost(ForceTargetInstructionCost); 294*0fca6ea1SDimitry Andric 295*0fca6ea1SDimitry Andric LLVM_DEBUG({ 296*0fca6ea1SDimitry Andric dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": "; 297*0fca6ea1SDimitry Andric dump(); 298*0fca6ea1SDimitry Andric }); 299*0fca6ea1SDimitry Andric return RecipeCost; 300*0fca6ea1SDimitry Andric } 301*0fca6ea1SDimitry Andric 302*0fca6ea1SDimitry Andric InstructionCost VPRecipeBase::computeCost(ElementCount VF, 303*0fca6ea1SDimitry Andric VPCostContext &Ctx) const { 304*0fca6ea1SDimitry Andric // Compute the cost for the recipe falling back to the legacy cost model using 305*0fca6ea1SDimitry Andric // the underlying instruction. If there is no underlying instruction, returns 306*0fca6ea1SDimitry Andric // 0. 307*0fca6ea1SDimitry Andric Instruction *UI = getInstructionForCost(this); 308*0fca6ea1SDimitry Andric if (UI && isa<VPReplicateRecipe>(this)) { 309*0fca6ea1SDimitry Andric // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan 310*0fca6ea1SDimitry Andric // transform, avoid computing their cost multiple times for now. 311*0fca6ea1SDimitry Andric Ctx.SkipCostComputation.insert(UI); 312*0fca6ea1SDimitry Andric } 313*0fca6ea1SDimitry Andric return UI ? Ctx.getLegacyCost(UI, VF) : 0; 314*0fca6ea1SDimitry Andric } 315*0fca6ea1SDimitry Andric 3165f757f3fSDimitry Andric FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const { 3175f757f3fSDimitry Andric assert(OpType == OperationType::FPMathOp && 3185f757f3fSDimitry Andric "recipe doesn't have fast math flags"); 3195f757f3fSDimitry Andric FastMathFlags Res; 3205f757f3fSDimitry Andric Res.setAllowReassoc(FMFs.AllowReassoc); 3215f757f3fSDimitry Andric Res.setNoNaNs(FMFs.NoNaNs); 3225f757f3fSDimitry Andric Res.setNoInfs(FMFs.NoInfs); 3235f757f3fSDimitry Andric Res.setNoSignedZeros(FMFs.NoSignedZeros); 3245f757f3fSDimitry Andric Res.setAllowReciprocal(FMFs.AllowReciprocal); 3255f757f3fSDimitry Andric Res.setAllowContract(FMFs.AllowContract); 3265f757f3fSDimitry Andric Res.setApproxFunc(FMFs.ApproxFunc); 3275f757f3fSDimitry Andric return Res; 3285f757f3fSDimitry Andric } 3295f757f3fSDimitry Andric 3305f757f3fSDimitry Andric VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, 3315f757f3fSDimitry Andric VPValue *A, VPValue *B, DebugLoc DL, 3325f757f3fSDimitry Andric const Twine &Name) 3335f757f3fSDimitry Andric : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}), 3345f757f3fSDimitry Andric Pred, DL), 3357a6dacacSDimitry Andric Opcode(Opcode), Name(Name.str()) { 3365f757f3fSDimitry Andric assert(Opcode == Instruction::ICmp && 3375f757f3fSDimitry Andric "only ICmp predicates supported at the moment"); 3385f757f3fSDimitry Andric } 3395f757f3fSDimitry Andric 3405f757f3fSDimitry Andric VPInstruction::VPInstruction(unsigned Opcode, 3415f757f3fSDimitry Andric std::initializer_list<VPValue *> Operands, 3425f757f3fSDimitry Andric FastMathFlags FMFs, DebugLoc DL, const Twine &Name) 3435f757f3fSDimitry Andric : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL), 3447a6dacacSDimitry Andric Opcode(Opcode), Name(Name.str()) { 3455f757f3fSDimitry Andric // Make sure the VPInstruction is a floating-point operation. 3465f757f3fSDimitry Andric assert(isFPMathOp() && "this op can't take fast-math flags"); 3475f757f3fSDimitry Andric } 3485f757f3fSDimitry Andric 349*0fca6ea1SDimitry Andric bool VPInstruction::doesGeneratePerAllLanes() const { 350*0fca6ea1SDimitry Andric return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this); 351*0fca6ea1SDimitry Andric } 352*0fca6ea1SDimitry Andric 353*0fca6ea1SDimitry Andric bool VPInstruction::canGenerateScalarForFirstLane() const { 354*0fca6ea1SDimitry Andric if (Instruction::isBinaryOp(getOpcode())) 355*0fca6ea1SDimitry Andric return true; 356*0fca6ea1SDimitry Andric if (isSingleScalar() || isVectorToScalar()) 357*0fca6ea1SDimitry Andric return true; 358*0fca6ea1SDimitry Andric switch (Opcode) { 359*0fca6ea1SDimitry Andric case Instruction::ICmp: 360*0fca6ea1SDimitry Andric case VPInstruction::BranchOnCond: 361*0fca6ea1SDimitry Andric case VPInstruction::BranchOnCount: 362*0fca6ea1SDimitry Andric case VPInstruction::CalculateTripCountMinusVF: 363*0fca6ea1SDimitry Andric case VPInstruction::CanonicalIVIncrementForPart: 364*0fca6ea1SDimitry Andric case VPInstruction::PtrAdd: 365*0fca6ea1SDimitry Andric case VPInstruction::ExplicitVectorLength: 366*0fca6ea1SDimitry Andric return true; 367*0fca6ea1SDimitry Andric default: 368*0fca6ea1SDimitry Andric return false; 369*0fca6ea1SDimitry Andric } 370*0fca6ea1SDimitry Andric } 371*0fca6ea1SDimitry Andric 372*0fca6ea1SDimitry Andric Value *VPInstruction::generatePerLane(VPTransformState &State, 373*0fca6ea1SDimitry Andric const VPIteration &Lane) { 37481ad6265SDimitry Andric IRBuilderBase &Builder = State.Builder; 375*0fca6ea1SDimitry Andric 376*0fca6ea1SDimitry Andric assert(getOpcode() == VPInstruction::PtrAdd && 377*0fca6ea1SDimitry Andric "only PtrAdd opcodes are supported for now"); 378*0fca6ea1SDimitry Andric return Builder.CreatePtrAdd(State.get(getOperand(0), Lane), 379*0fca6ea1SDimitry Andric State.get(getOperand(1), Lane), Name); 380*0fca6ea1SDimitry Andric } 381*0fca6ea1SDimitry Andric 382*0fca6ea1SDimitry Andric Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { 383*0fca6ea1SDimitry Andric IRBuilderBase &Builder = State.Builder; 38481ad6265SDimitry Andric 38581ad6265SDimitry Andric if (Instruction::isBinaryOp(getOpcode())) { 386*0fca6ea1SDimitry Andric bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); 387*0fca6ea1SDimitry Andric Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed); 388*0fca6ea1SDimitry Andric Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed); 3895f757f3fSDimitry Andric auto *Res = 3905f757f3fSDimitry Andric Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name); 3915f757f3fSDimitry Andric if (auto *I = dyn_cast<Instruction>(Res)) 3925f757f3fSDimitry Andric setFlags(I); 3935f757f3fSDimitry Andric return Res; 39481ad6265SDimitry Andric } 39581ad6265SDimitry Andric 39681ad6265SDimitry Andric switch (getOpcode()) { 39781ad6265SDimitry Andric case VPInstruction::Not: { 39881ad6265SDimitry Andric Value *A = State.get(getOperand(0), Part); 39906c3fb27SDimitry Andric return Builder.CreateNot(A, Name); 40081ad6265SDimitry Andric } 4015f757f3fSDimitry Andric case Instruction::ICmp: { 402*0fca6ea1SDimitry Andric bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); 403*0fca6ea1SDimitry Andric Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed); 404*0fca6ea1SDimitry Andric Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed); 4055f757f3fSDimitry Andric return Builder.CreateCmp(getPredicate(), A, B, Name); 40681ad6265SDimitry Andric } 40781ad6265SDimitry Andric case Instruction::Select: { 40881ad6265SDimitry Andric Value *Cond = State.get(getOperand(0), Part); 40981ad6265SDimitry Andric Value *Op1 = State.get(getOperand(1), Part); 41081ad6265SDimitry Andric Value *Op2 = State.get(getOperand(2), Part); 41106c3fb27SDimitry Andric return Builder.CreateSelect(Cond, Op1, Op2, Name); 41281ad6265SDimitry Andric } 41381ad6265SDimitry Andric case VPInstruction::ActiveLaneMask: { 41481ad6265SDimitry Andric // Get first lane of vector induction variable. 41581ad6265SDimitry Andric Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); 41681ad6265SDimitry Andric // Get the original loop tripcount. 41706c3fb27SDimitry Andric Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0)); 41881ad6265SDimitry Andric 419*0fca6ea1SDimitry Andric // If this part of the active lane mask is scalar, generate the CMP directly 420*0fca6ea1SDimitry Andric // to avoid unnecessary extracts. 421*0fca6ea1SDimitry Andric if (State.VF.isScalar()) 422*0fca6ea1SDimitry Andric return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC, 423*0fca6ea1SDimitry Andric Name); 424*0fca6ea1SDimitry Andric 42581ad6265SDimitry Andric auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); 42681ad6265SDimitry Andric auto *PredTy = VectorType::get(Int1Ty, State.VF); 42706c3fb27SDimitry Andric return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask, 42806c3fb27SDimitry Andric {PredTy, ScalarTC->getType()}, 429753f127fSDimitry Andric {VIVElem0, ScalarTC}, nullptr, Name); 43081ad6265SDimitry Andric } 43181ad6265SDimitry Andric case VPInstruction::FirstOrderRecurrenceSplice: { 43281ad6265SDimitry Andric // Generate code to combine the previous and current values in vector v3. 43381ad6265SDimitry Andric // 43481ad6265SDimitry Andric // vector.ph: 43581ad6265SDimitry Andric // v_init = vector(..., ..., ..., a[-1]) 43681ad6265SDimitry Andric // br vector.body 43781ad6265SDimitry Andric // 43881ad6265SDimitry Andric // vector.body 43981ad6265SDimitry Andric // i = phi [0, vector.ph], [i+4, vector.body] 44081ad6265SDimitry Andric // v1 = phi [v_init, vector.ph], [v2, vector.body] 44181ad6265SDimitry Andric // v2 = a[i, i+1, i+2, i+3]; 44281ad6265SDimitry Andric // v3 = vector(v1(3), v2(0, 1, 2)) 44381ad6265SDimitry Andric 44481ad6265SDimitry Andric // For the first part, use the recurrence phi (v1), otherwise v2. 44581ad6265SDimitry Andric auto *V1 = State.get(getOperand(0), 0); 44681ad6265SDimitry Andric Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); 44706c3fb27SDimitry Andric if (!PartMinus1->getType()->isVectorTy()) 44806c3fb27SDimitry Andric return PartMinus1; 44981ad6265SDimitry Andric Value *V2 = State.get(getOperand(1), Part); 45006c3fb27SDimitry Andric return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name); 45181ad6265SDimitry Andric } 45206c3fb27SDimitry Andric case VPInstruction::CalculateTripCountMinusVF: { 453*0fca6ea1SDimitry Andric if (Part != 0) 454*0fca6ea1SDimitry Andric return State.get(this, 0, /*IsScalar*/ true); 455*0fca6ea1SDimitry Andric 45606c3fb27SDimitry Andric Value *ScalarTC = State.get(getOperand(0), {0, 0}); 45706c3fb27SDimitry Andric Value *Step = 45806c3fb27SDimitry Andric createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF); 45906c3fb27SDimitry Andric Value *Sub = Builder.CreateSub(ScalarTC, Step); 46006c3fb27SDimitry Andric Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step); 46106c3fb27SDimitry Andric Value *Zero = ConstantInt::get(ScalarTC->getType(), 0); 46206c3fb27SDimitry Andric return Builder.CreateSelect(Cmp, Sub, Zero); 46381ad6265SDimitry Andric } 464*0fca6ea1SDimitry Andric case VPInstruction::ExplicitVectorLength: { 465*0fca6ea1SDimitry Andric // Compute EVL 466*0fca6ea1SDimitry Andric auto GetEVL = [=](VPTransformState &State, Value *AVL) { 467*0fca6ea1SDimitry Andric assert(AVL->getType()->isIntegerTy() && 468*0fca6ea1SDimitry Andric "Requested vector length should be an integer."); 469*0fca6ea1SDimitry Andric 470*0fca6ea1SDimitry Andric // TODO: Add support for MaxSafeDist for correct loop emission. 471*0fca6ea1SDimitry Andric assert(State.VF.isScalable() && "Expected scalable vector factor."); 472*0fca6ea1SDimitry Andric Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue()); 473*0fca6ea1SDimitry Andric 474*0fca6ea1SDimitry Andric Value *EVL = State.Builder.CreateIntrinsic( 475*0fca6ea1SDimitry Andric State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length, 476*0fca6ea1SDimitry Andric {AVL, VFArg, State.Builder.getTrue()}); 477*0fca6ea1SDimitry Andric return EVL; 478*0fca6ea1SDimitry Andric }; 479*0fca6ea1SDimitry Andric // TODO: Restructure this code with an explicit remainder loop, vsetvli can 480*0fca6ea1SDimitry Andric // be outside of the main loop. 481*0fca6ea1SDimitry Andric assert(Part == 0 && "No unrolling expected for predicated vectorization."); 482*0fca6ea1SDimitry Andric // Compute VTC - IV as the AVL (requested vector length). 483*0fca6ea1SDimitry Andric Value *Index = State.get(getOperand(0), VPIteration(0, 0)); 484*0fca6ea1SDimitry Andric Value *TripCount = State.get(getOperand(1), VPIteration(0, 0)); 485*0fca6ea1SDimitry Andric Value *AVL = State.Builder.CreateSub(TripCount, Index); 486*0fca6ea1SDimitry Andric Value *EVL = GetEVL(State, AVL); 487*0fca6ea1SDimitry Andric return EVL; 488*0fca6ea1SDimitry Andric } 4895f757f3fSDimitry Andric case VPInstruction::CanonicalIVIncrementForPart: { 490753f127fSDimitry Andric auto *IV = State.get(getOperand(0), VPIteration(0, 0)); 49106c3fb27SDimitry Andric if (Part == 0) 49206c3fb27SDimitry Andric return IV; 493753f127fSDimitry Andric 494753f127fSDimitry Andric // The canonical IV is incremented by the vectorization factor (num of SIMD 495753f127fSDimitry Andric // elements) times the unroll part. 496753f127fSDimitry Andric Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part); 4975f757f3fSDimitry Andric return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(), 4985f757f3fSDimitry Andric hasNoSignedWrap()); 499753f127fSDimitry Andric } 50081ad6265SDimitry Andric case VPInstruction::BranchOnCond: { 50181ad6265SDimitry Andric if (Part != 0) 50206c3fb27SDimitry Andric return nullptr; 50381ad6265SDimitry Andric 50481ad6265SDimitry Andric Value *Cond = State.get(getOperand(0), VPIteration(Part, 0)); 50581ad6265SDimitry Andric // Replace the temporary unreachable terminator with a new conditional 50681ad6265SDimitry Andric // branch, hooking it up to backward destination for exiting blocks now and 50781ad6265SDimitry Andric // to forward destination(s) later when they are created. 50881ad6265SDimitry Andric BranchInst *CondBr = 50981ad6265SDimitry Andric Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr); 51081ad6265SDimitry Andric CondBr->setSuccessor(0, nullptr); 51181ad6265SDimitry Andric Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 512*0fca6ea1SDimitry Andric 513*0fca6ea1SDimitry Andric if (!getParent()->isExiting()) 514*0fca6ea1SDimitry Andric return CondBr; 515*0fca6ea1SDimitry Andric 516*0fca6ea1SDimitry Andric VPRegionBlock *ParentRegion = getParent()->getParent(); 517*0fca6ea1SDimitry Andric VPBasicBlock *Header = ParentRegion->getEntryBasicBlock(); 518*0fca6ea1SDimitry Andric CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]); 51906c3fb27SDimitry Andric return CondBr; 52081ad6265SDimitry Andric } 52181ad6265SDimitry Andric case VPInstruction::BranchOnCount: { 52281ad6265SDimitry Andric if (Part != 0) 52306c3fb27SDimitry Andric return nullptr; 52481ad6265SDimitry Andric // First create the compare. 525*0fca6ea1SDimitry Andric Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true); 526*0fca6ea1SDimitry Andric Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true); 52781ad6265SDimitry Andric Value *Cond = Builder.CreateICmpEQ(IV, TC); 52881ad6265SDimitry Andric 52981ad6265SDimitry Andric // Now create the branch. 53081ad6265SDimitry Andric auto *Plan = getParent()->getPlan(); 53181ad6265SDimitry Andric VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); 53281ad6265SDimitry Andric VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); 53381ad6265SDimitry Andric 53481ad6265SDimitry Andric // Replace the temporary unreachable terminator with a new conditional 53581ad6265SDimitry Andric // branch, hooking it up to backward destination (the header) now and to the 53681ad6265SDimitry Andric // forward destination (the exit/middle block) later when it is created. 53781ad6265SDimitry Andric // Note that CreateCondBr expects a valid BB as first argument, so we need 53881ad6265SDimitry Andric // to set it to nullptr later. 53981ad6265SDimitry Andric BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), 54081ad6265SDimitry Andric State.CFG.VPBB2IRBB[Header]); 54181ad6265SDimitry Andric CondBr->setSuccessor(0, nullptr); 54281ad6265SDimitry Andric Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 54306c3fb27SDimitry Andric return CondBr; 54481ad6265SDimitry Andric } 5451db9f3b2SDimitry Andric case VPInstruction::ComputeReductionResult: { 5461db9f3b2SDimitry Andric if (Part != 0) 547*0fca6ea1SDimitry Andric return State.get(this, 0, /*IsScalar*/ true); 5481db9f3b2SDimitry Andric 5491db9f3b2SDimitry Andric // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary 5501db9f3b2SDimitry Andric // and will be removed by breaking up the recipe further. 5511db9f3b2SDimitry Andric auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0)); 5521db9f3b2SDimitry Andric auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue()); 5531db9f3b2SDimitry Andric // Get its reduction variable descriptor. 5541db9f3b2SDimitry Andric const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); 5551db9f3b2SDimitry Andric 5561db9f3b2SDimitry Andric RecurKind RK = RdxDesc.getRecurrenceKind(); 5571db9f3b2SDimitry Andric 5581db9f3b2SDimitry Andric VPValue *LoopExitingDef = getOperand(1); 5591db9f3b2SDimitry Andric Type *PhiTy = OrigPhi->getType(); 5601db9f3b2SDimitry Andric VectorParts RdxParts(State.UF); 5611db9f3b2SDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) 562*0fca6ea1SDimitry Andric RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop()); 5631db9f3b2SDimitry Andric 5641db9f3b2SDimitry Andric // If the vector reduction can be performed in a smaller type, we truncate 5651db9f3b2SDimitry Andric // then extend the loop exit value to enable InstCombine to evaluate the 5661db9f3b2SDimitry Andric // entire expression in the smaller type. 5671db9f3b2SDimitry Andric // TODO: Handle this in truncateToMinBW. 5681db9f3b2SDimitry Andric if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { 5691db9f3b2SDimitry Andric Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF); 5701db9f3b2SDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) 5711db9f3b2SDimitry Andric RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); 5721db9f3b2SDimitry Andric } 5731db9f3b2SDimitry Andric // Reduce all of the unrolled parts into a single vector. 5741db9f3b2SDimitry Andric Value *ReducedPartRdx = RdxParts[0]; 5751db9f3b2SDimitry Andric unsigned Op = RecurrenceDescriptor::getOpcode(RK); 576*0fca6ea1SDimitry Andric if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) 577*0fca6ea1SDimitry Andric Op = Instruction::Or; 5781db9f3b2SDimitry Andric 5791db9f3b2SDimitry Andric if (PhiR->isOrdered()) { 5801db9f3b2SDimitry Andric ReducedPartRdx = RdxParts[State.UF - 1]; 5811db9f3b2SDimitry Andric } else { 5821db9f3b2SDimitry Andric // Floating-point operations should have some FMF to enable the reduction. 5831db9f3b2SDimitry Andric IRBuilderBase::FastMathFlagGuard FMFG(Builder); 5841db9f3b2SDimitry Andric Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); 5851db9f3b2SDimitry Andric for (unsigned Part = 1; Part < State.UF; ++Part) { 5861db9f3b2SDimitry Andric Value *RdxPart = RdxParts[Part]; 5871db9f3b2SDimitry Andric if (Op != Instruction::ICmp && Op != Instruction::FCmp) 5881db9f3b2SDimitry Andric ReducedPartRdx = Builder.CreateBinOp( 5891db9f3b2SDimitry Andric (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx"); 590*0fca6ea1SDimitry Andric else 5911db9f3b2SDimitry Andric ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart); 5921db9f3b2SDimitry Andric } 5931db9f3b2SDimitry Andric } 5941db9f3b2SDimitry Andric 5951db9f3b2SDimitry Andric // Create the reduction after the loop. Note that inloop reductions create 5961db9f3b2SDimitry Andric // the target reduction in the loop using a Reduction recipe. 597*0fca6ea1SDimitry Andric if ((State.VF.isVector() || 598*0fca6ea1SDimitry Andric RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) && 599*0fca6ea1SDimitry Andric !PhiR->isInLoop()) { 6001db9f3b2SDimitry Andric ReducedPartRdx = 6011db9f3b2SDimitry Andric createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi); 6021db9f3b2SDimitry Andric // If the reduction can be performed in a smaller type, we need to extend 6031db9f3b2SDimitry Andric // the reduction to the wider type before we branch to the original loop. 6041db9f3b2SDimitry Andric if (PhiTy != RdxDesc.getRecurrenceType()) 6051db9f3b2SDimitry Andric ReducedPartRdx = RdxDesc.isSigned() 6061db9f3b2SDimitry Andric ? Builder.CreateSExt(ReducedPartRdx, PhiTy) 6071db9f3b2SDimitry Andric : Builder.CreateZExt(ReducedPartRdx, PhiTy); 6081db9f3b2SDimitry Andric } 6091db9f3b2SDimitry Andric 6101db9f3b2SDimitry Andric // If there were stores of the reduction value to a uniform memory address 6111db9f3b2SDimitry Andric // inside the loop, create the final store here. 6121db9f3b2SDimitry Andric if (StoreInst *SI = RdxDesc.IntermediateStore) { 6131db9f3b2SDimitry Andric auto *NewSI = Builder.CreateAlignedStore( 6141db9f3b2SDimitry Andric ReducedPartRdx, SI->getPointerOperand(), SI->getAlign()); 6151db9f3b2SDimitry Andric propagateMetadata(NewSI, SI); 6161db9f3b2SDimitry Andric } 6171db9f3b2SDimitry Andric 6181db9f3b2SDimitry Andric return ReducedPartRdx; 6191db9f3b2SDimitry Andric } 620*0fca6ea1SDimitry Andric case VPInstruction::ExtractFromEnd: { 621*0fca6ea1SDimitry Andric if (Part != 0) 622*0fca6ea1SDimitry Andric return State.get(this, 0, /*IsScalar*/ true); 623*0fca6ea1SDimitry Andric 624*0fca6ea1SDimitry Andric auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue()); 625*0fca6ea1SDimitry Andric unsigned Offset = CI->getZExtValue(); 626*0fca6ea1SDimitry Andric assert(Offset > 0 && "Offset from end must be positive"); 627*0fca6ea1SDimitry Andric Value *Res; 628*0fca6ea1SDimitry Andric if (State.VF.isVector()) { 629*0fca6ea1SDimitry Andric assert(Offset <= State.VF.getKnownMinValue() && 630*0fca6ea1SDimitry Andric "invalid offset to extract from"); 631*0fca6ea1SDimitry Andric // Extract lane VF - Offset from the operand. 632*0fca6ea1SDimitry Andric Res = State.get( 633*0fca6ea1SDimitry Andric getOperand(0), 634*0fca6ea1SDimitry Andric VPIteration(State.UF - 1, VPLane::getLaneFromEnd(State.VF, Offset))); 635*0fca6ea1SDimitry Andric } else { 636*0fca6ea1SDimitry Andric assert(Offset <= State.UF && "invalid offset to extract from"); 637*0fca6ea1SDimitry Andric // When loop is unrolled without vectorizing, retrieve UF - Offset. 638*0fca6ea1SDimitry Andric Res = State.get(getOperand(0), State.UF - Offset); 639*0fca6ea1SDimitry Andric } 640*0fca6ea1SDimitry Andric if (isa<ExtractElementInst>(Res)) 641*0fca6ea1SDimitry Andric Res->setName(Name); 642*0fca6ea1SDimitry Andric return Res; 643*0fca6ea1SDimitry Andric } 644*0fca6ea1SDimitry Andric case VPInstruction::LogicalAnd: { 645*0fca6ea1SDimitry Andric Value *A = State.get(getOperand(0), Part); 646*0fca6ea1SDimitry Andric Value *B = State.get(getOperand(1), Part); 647*0fca6ea1SDimitry Andric return Builder.CreateLogicalAnd(A, B, Name); 648*0fca6ea1SDimitry Andric } 649*0fca6ea1SDimitry Andric case VPInstruction::PtrAdd: { 650*0fca6ea1SDimitry Andric assert(vputils::onlyFirstLaneUsed(this) && 651*0fca6ea1SDimitry Andric "can only generate first lane for PtrAdd"); 652*0fca6ea1SDimitry Andric Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true); 653*0fca6ea1SDimitry Andric Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true); 654*0fca6ea1SDimitry Andric return Builder.CreatePtrAdd(Ptr, Addend, Name); 655*0fca6ea1SDimitry Andric } 656*0fca6ea1SDimitry Andric case VPInstruction::ResumePhi: { 657*0fca6ea1SDimitry Andric if (Part != 0) 658*0fca6ea1SDimitry Andric return State.get(this, 0, /*IsScalar*/ true); 659*0fca6ea1SDimitry Andric Value *IncomingFromVPlanPred = 660*0fca6ea1SDimitry Andric State.get(getOperand(0), Part, /* IsScalar */ true); 661*0fca6ea1SDimitry Andric Value *IncomingFromOtherPreds = 662*0fca6ea1SDimitry Andric State.get(getOperand(1), Part, /* IsScalar */ true); 663*0fca6ea1SDimitry Andric auto *NewPhi = 664*0fca6ea1SDimitry Andric Builder.CreatePHI(IncomingFromOtherPreds->getType(), 2, Name); 665*0fca6ea1SDimitry Andric BasicBlock *VPlanPred = 666*0fca6ea1SDimitry Andric State.CFG 667*0fca6ea1SDimitry Andric .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getSinglePredecessor())]; 668*0fca6ea1SDimitry Andric NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred); 669*0fca6ea1SDimitry Andric for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) { 670*0fca6ea1SDimitry Andric assert(OtherPred != VPlanPred && 671*0fca6ea1SDimitry Andric "VPlan predecessors should not be connected yet"); 672*0fca6ea1SDimitry Andric NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred); 673*0fca6ea1SDimitry Andric } 674*0fca6ea1SDimitry Andric return NewPhi; 675*0fca6ea1SDimitry Andric } 676*0fca6ea1SDimitry Andric 67781ad6265SDimitry Andric default: 67881ad6265SDimitry Andric llvm_unreachable("Unsupported opcode for instruction"); 67981ad6265SDimitry Andric } 68081ad6265SDimitry Andric } 68181ad6265SDimitry Andric 682*0fca6ea1SDimitry Andric bool VPInstruction::isVectorToScalar() const { 683*0fca6ea1SDimitry Andric return getOpcode() == VPInstruction::ExtractFromEnd || 684*0fca6ea1SDimitry Andric getOpcode() == VPInstruction::ComputeReductionResult; 685*0fca6ea1SDimitry Andric } 686*0fca6ea1SDimitry Andric 687*0fca6ea1SDimitry Andric bool VPInstruction::isSingleScalar() const { 688*0fca6ea1SDimitry Andric return getOpcode() == VPInstruction::ResumePhi; 689*0fca6ea1SDimitry Andric } 690*0fca6ea1SDimitry Andric 6915f757f3fSDimitry Andric #if !defined(NDEBUG) 6925f757f3fSDimitry Andric bool VPInstruction::isFPMathOp() const { 6935f757f3fSDimitry Andric // Inspired by FPMathOperator::classof. Notable differences are that we don't 6945f757f3fSDimitry Andric // support Call, PHI and Select opcodes here yet. 6955f757f3fSDimitry Andric return Opcode == Instruction::FAdd || Opcode == Instruction::FMul || 6965f757f3fSDimitry Andric Opcode == Instruction::FNeg || Opcode == Instruction::FSub || 6975f757f3fSDimitry Andric Opcode == Instruction::FDiv || Opcode == Instruction::FRem || 6985f757f3fSDimitry Andric Opcode == Instruction::FCmp || Opcode == Instruction::Select; 6995f757f3fSDimitry Andric } 7005f757f3fSDimitry Andric #endif 7015f757f3fSDimitry Andric 70281ad6265SDimitry Andric void VPInstruction::execute(VPTransformState &State) { 70381ad6265SDimitry Andric assert(!State.Instance && "VPInstruction executing an Instance"); 70481ad6265SDimitry Andric IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 7055f757f3fSDimitry Andric assert((hasFastMathFlags() == isFPMathOp() || 7065f757f3fSDimitry Andric getOpcode() == Instruction::Select) && 7075f757f3fSDimitry Andric "Recipe not a FPMathOp but has fast-math flags?"); 7085f757f3fSDimitry Andric if (hasFastMathFlags()) 7095f757f3fSDimitry Andric State.Builder.setFastMathFlags(getFastMathFlags()); 710*0fca6ea1SDimitry Andric State.setDebugLocFrom(getDebugLoc()); 711*0fca6ea1SDimitry Andric bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() && 712*0fca6ea1SDimitry Andric (vputils::onlyFirstLaneUsed(this) || 713*0fca6ea1SDimitry Andric isVectorToScalar() || isSingleScalar()); 714*0fca6ea1SDimitry Andric bool GeneratesPerAllLanes = doesGeneratePerAllLanes(); 715*0fca6ea1SDimitry Andric bool OnlyFirstPartUsed = vputils::onlyFirstPartUsed(this); 71606c3fb27SDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) { 717*0fca6ea1SDimitry Andric if (GeneratesPerAllLanes) { 718*0fca6ea1SDimitry Andric for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue(); 719*0fca6ea1SDimitry Andric Lane != NumLanes; ++Lane) { 720*0fca6ea1SDimitry Andric Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane)); 721*0fca6ea1SDimitry Andric assert(GeneratedValue && "generatePerLane must produce a value"); 722*0fca6ea1SDimitry Andric State.set(this, GeneratedValue, VPIteration(Part, Lane)); 723*0fca6ea1SDimitry Andric } 724*0fca6ea1SDimitry Andric continue; 725*0fca6ea1SDimitry Andric } 726*0fca6ea1SDimitry Andric 727*0fca6ea1SDimitry Andric if (Part != 0 && OnlyFirstPartUsed && hasResult()) { 728*0fca6ea1SDimitry Andric Value *Part0 = State.get(this, 0, /*IsScalar*/ GeneratesPerFirstLaneOnly); 729*0fca6ea1SDimitry Andric State.set(this, Part0, Part, 730*0fca6ea1SDimitry Andric /*IsScalar*/ GeneratesPerFirstLaneOnly); 731*0fca6ea1SDimitry Andric continue; 732*0fca6ea1SDimitry Andric } 733*0fca6ea1SDimitry Andric 734*0fca6ea1SDimitry Andric Value *GeneratedValue = generatePerPart(State, Part); 73506c3fb27SDimitry Andric if (!hasResult()) 73606c3fb27SDimitry Andric continue; 737*0fca6ea1SDimitry Andric assert(GeneratedValue && "generatePerPart must produce a value"); 738*0fca6ea1SDimitry Andric assert((GeneratedValue->getType()->isVectorTy() == 739*0fca6ea1SDimitry Andric !GeneratesPerFirstLaneOnly || 740*0fca6ea1SDimitry Andric State.VF.isScalar()) && 741*0fca6ea1SDimitry Andric "scalar value but not only first lane defined"); 742*0fca6ea1SDimitry Andric State.set(this, GeneratedValue, Part, 743*0fca6ea1SDimitry Andric /*IsScalar*/ GeneratesPerFirstLaneOnly); 74406c3fb27SDimitry Andric } 74581ad6265SDimitry Andric } 74681ad6265SDimitry Andric 747*0fca6ea1SDimitry Andric bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { 748*0fca6ea1SDimitry Andric assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); 749*0fca6ea1SDimitry Andric if (Instruction::isBinaryOp(getOpcode())) 750*0fca6ea1SDimitry Andric return vputils::onlyFirstLaneUsed(this); 751*0fca6ea1SDimitry Andric 752*0fca6ea1SDimitry Andric switch (getOpcode()) { 753*0fca6ea1SDimitry Andric default: 754*0fca6ea1SDimitry Andric return false; 755*0fca6ea1SDimitry Andric case Instruction::ICmp: 756*0fca6ea1SDimitry Andric case VPInstruction::PtrAdd: 757*0fca6ea1SDimitry Andric // TODO: Cover additional opcodes. 758*0fca6ea1SDimitry Andric return vputils::onlyFirstLaneUsed(this); 759*0fca6ea1SDimitry Andric case VPInstruction::ActiveLaneMask: 760*0fca6ea1SDimitry Andric case VPInstruction::ExplicitVectorLength: 761*0fca6ea1SDimitry Andric case VPInstruction::CalculateTripCountMinusVF: 762*0fca6ea1SDimitry Andric case VPInstruction::CanonicalIVIncrementForPart: 763*0fca6ea1SDimitry Andric case VPInstruction::BranchOnCount: 764*0fca6ea1SDimitry Andric case VPInstruction::BranchOnCond: 765*0fca6ea1SDimitry Andric case VPInstruction::ResumePhi: 766*0fca6ea1SDimitry Andric return true; 767*0fca6ea1SDimitry Andric }; 768*0fca6ea1SDimitry Andric llvm_unreachable("switch should return"); 769*0fca6ea1SDimitry Andric } 770*0fca6ea1SDimitry Andric 771*0fca6ea1SDimitry Andric bool VPInstruction::onlyFirstPartUsed(const VPValue *Op) const { 772*0fca6ea1SDimitry Andric assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); 773*0fca6ea1SDimitry Andric if (Instruction::isBinaryOp(getOpcode())) 774*0fca6ea1SDimitry Andric return vputils::onlyFirstPartUsed(this); 775*0fca6ea1SDimitry Andric 776*0fca6ea1SDimitry Andric switch (getOpcode()) { 777*0fca6ea1SDimitry Andric default: 778*0fca6ea1SDimitry Andric return false; 779*0fca6ea1SDimitry Andric case Instruction::ICmp: 780*0fca6ea1SDimitry Andric case Instruction::Select: 781*0fca6ea1SDimitry Andric return vputils::onlyFirstPartUsed(this); 782*0fca6ea1SDimitry Andric case VPInstruction::BranchOnCount: 783*0fca6ea1SDimitry Andric case VPInstruction::BranchOnCond: 784*0fca6ea1SDimitry Andric case VPInstruction::CanonicalIVIncrementForPart: 785*0fca6ea1SDimitry Andric return true; 786*0fca6ea1SDimitry Andric }; 787*0fca6ea1SDimitry Andric llvm_unreachable("switch should return"); 788*0fca6ea1SDimitry Andric } 789*0fca6ea1SDimitry Andric 79081ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 79181ad6265SDimitry Andric void VPInstruction::dump() const { 79281ad6265SDimitry Andric VPSlotTracker SlotTracker(getParent()->getPlan()); 79381ad6265SDimitry Andric print(dbgs(), "", SlotTracker); 79481ad6265SDimitry Andric } 79581ad6265SDimitry Andric 79681ad6265SDimitry Andric void VPInstruction::print(raw_ostream &O, const Twine &Indent, 79781ad6265SDimitry Andric VPSlotTracker &SlotTracker) const { 79881ad6265SDimitry Andric O << Indent << "EMIT "; 79981ad6265SDimitry Andric 80081ad6265SDimitry Andric if (hasResult()) { 80181ad6265SDimitry Andric printAsOperand(O, SlotTracker); 80281ad6265SDimitry Andric O << " = "; 80381ad6265SDimitry Andric } 80481ad6265SDimitry Andric 80581ad6265SDimitry Andric switch (getOpcode()) { 80681ad6265SDimitry Andric case VPInstruction::Not: 80781ad6265SDimitry Andric O << "not"; 80881ad6265SDimitry Andric break; 80981ad6265SDimitry Andric case VPInstruction::SLPLoad: 81081ad6265SDimitry Andric O << "combined load"; 81181ad6265SDimitry Andric break; 81281ad6265SDimitry Andric case VPInstruction::SLPStore: 81381ad6265SDimitry Andric O << "combined store"; 81481ad6265SDimitry Andric break; 81581ad6265SDimitry Andric case VPInstruction::ActiveLaneMask: 81681ad6265SDimitry Andric O << "active lane mask"; 81781ad6265SDimitry Andric break; 818*0fca6ea1SDimitry Andric case VPInstruction::ResumePhi: 819*0fca6ea1SDimitry Andric O << "resume-phi"; 820*0fca6ea1SDimitry Andric break; 821*0fca6ea1SDimitry Andric case VPInstruction::ExplicitVectorLength: 822*0fca6ea1SDimitry Andric O << "EXPLICIT-VECTOR-LENGTH"; 823*0fca6ea1SDimitry Andric break; 82481ad6265SDimitry Andric case VPInstruction::FirstOrderRecurrenceSplice: 82581ad6265SDimitry Andric O << "first-order splice"; 82681ad6265SDimitry Andric break; 82781ad6265SDimitry Andric case VPInstruction::BranchOnCond: 82881ad6265SDimitry Andric O << "branch-on-cond"; 82981ad6265SDimitry Andric break; 83006c3fb27SDimitry Andric case VPInstruction::CalculateTripCountMinusVF: 83106c3fb27SDimitry Andric O << "TC > VF ? TC - VF : 0"; 83206c3fb27SDimitry Andric break; 833753f127fSDimitry Andric case VPInstruction::CanonicalIVIncrementForPart: 834753f127fSDimitry Andric O << "VF * Part +"; 835753f127fSDimitry Andric break; 83681ad6265SDimitry Andric case VPInstruction::BranchOnCount: 83781ad6265SDimitry Andric O << "branch-on-count"; 83881ad6265SDimitry Andric break; 839*0fca6ea1SDimitry Andric case VPInstruction::ExtractFromEnd: 840*0fca6ea1SDimitry Andric O << "extract-from-end"; 841*0fca6ea1SDimitry Andric break; 8421db9f3b2SDimitry Andric case VPInstruction::ComputeReductionResult: 8431db9f3b2SDimitry Andric O << "compute-reduction-result"; 8441db9f3b2SDimitry Andric break; 845*0fca6ea1SDimitry Andric case VPInstruction::LogicalAnd: 846*0fca6ea1SDimitry Andric O << "logical-and"; 847*0fca6ea1SDimitry Andric break; 848*0fca6ea1SDimitry Andric case VPInstruction::PtrAdd: 849*0fca6ea1SDimitry Andric O << "ptradd"; 850*0fca6ea1SDimitry Andric break; 85181ad6265SDimitry Andric default: 85281ad6265SDimitry Andric O << Instruction::getOpcodeName(getOpcode()); 85381ad6265SDimitry Andric } 85481ad6265SDimitry Andric 8555f757f3fSDimitry Andric printFlags(O); 8565f757f3fSDimitry Andric printOperands(O, SlotTracker); 85781ad6265SDimitry Andric 8585f757f3fSDimitry Andric if (auto DL = getDebugLoc()) { 85981ad6265SDimitry Andric O << ", !dbg "; 86081ad6265SDimitry Andric DL.print(O); 86181ad6265SDimitry Andric } 86281ad6265SDimitry Andric } 86381ad6265SDimitry Andric #endif 86481ad6265SDimitry Andric 865bdd1243dSDimitry Andric void VPWidenCallRecipe::execute(VPTransformState &State) { 86606c3fb27SDimitry Andric assert(State.VF.isVector() && "not widening"); 867*0fca6ea1SDimitry Andric Function *CalledScalarFn = getCalledScalarFunction(); 868*0fca6ea1SDimitry Andric assert(!isDbgInfoIntrinsic(CalledScalarFn->getIntrinsicID()) && 869bdd1243dSDimitry Andric "DbgInfoIntrinsic should have been dropped during VPlan construction"); 8707a6dacacSDimitry Andric State.setDebugLocFrom(getDebugLoc()); 871bdd1243dSDimitry Andric 872647cbc5dSDimitry Andric bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic; 8735f757f3fSDimitry Andric FunctionType *VFTy = nullptr; 8745f757f3fSDimitry Andric if (Variant) 8755f757f3fSDimitry Andric VFTy = Variant->getFunctionType(); 876bdd1243dSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) { 87706c3fb27SDimitry Andric SmallVector<Type *, 2> TysForDecl; 87806c3fb27SDimitry Andric // Add return type if intrinsic is overloaded on it. 879647cbc5dSDimitry Andric if (UseIntrinsic && 880647cbc5dSDimitry Andric isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) 881*0fca6ea1SDimitry Andric TysForDecl.push_back(VectorType::get( 882*0fca6ea1SDimitry Andric CalledScalarFn->getReturnType()->getScalarType(), State.VF)); 883bdd1243dSDimitry Andric SmallVector<Value *, 4> Args; 884*0fca6ea1SDimitry Andric for (const auto &I : enumerate(arg_operands())) { 885bdd1243dSDimitry Andric // Some intrinsics have a scalar argument - don't replace it with a 886bdd1243dSDimitry Andric // vector. 887bdd1243dSDimitry Andric Value *Arg; 888b3edf446SDimitry Andric if (UseIntrinsic && 889b3edf446SDimitry Andric isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())) 890bdd1243dSDimitry Andric Arg = State.get(I.value(), VPIteration(0, 0)); 891b3edf446SDimitry Andric // Some vectorized function variants may also take a scalar argument, 892b3edf446SDimitry Andric // e.g. linear parameters for pointers. This needs to be the scalar value 893b3edf446SDimitry Andric // from the start of the respective part when interleaving. 894b3edf446SDimitry Andric else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy()) 895b3edf446SDimitry Andric Arg = State.get(I.value(), VPIteration(Part, 0)); 8965f757f3fSDimitry Andric else 8975f757f3fSDimitry Andric Arg = State.get(I.value(), Part); 898647cbc5dSDimitry Andric if (UseIntrinsic && 899647cbc5dSDimitry Andric isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index())) 900bdd1243dSDimitry Andric TysForDecl.push_back(Arg->getType()); 901bdd1243dSDimitry Andric Args.push_back(Arg); 902bdd1243dSDimitry Andric } 903bdd1243dSDimitry Andric 904bdd1243dSDimitry Andric Function *VectorF; 905647cbc5dSDimitry Andric if (UseIntrinsic) { 906bdd1243dSDimitry Andric // Use vector version of the intrinsic. 907bdd1243dSDimitry Andric Module *M = State.Builder.GetInsertBlock()->getModule(); 908bdd1243dSDimitry Andric VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl); 909bdd1243dSDimitry Andric assert(VectorF && "Can't retrieve vector intrinsic."); 910bdd1243dSDimitry Andric } else { 911bdd1243dSDimitry Andric #ifndef NDEBUG 91206c3fb27SDimitry Andric assert(Variant != nullptr && "Can't create vector function."); 913bdd1243dSDimitry Andric #endif 91406c3fb27SDimitry Andric VectorF = Variant; 915bdd1243dSDimitry Andric } 91606c3fb27SDimitry Andric 917*0fca6ea1SDimitry Andric auto *CI = cast_or_null<CallInst>(getUnderlyingInstr()); 918bdd1243dSDimitry Andric SmallVector<OperandBundleDef, 1> OpBundles; 919*0fca6ea1SDimitry Andric if (CI) 920*0fca6ea1SDimitry Andric CI->getOperandBundlesAsDefs(OpBundles); 921*0fca6ea1SDimitry Andric 922bdd1243dSDimitry Andric CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles); 923bdd1243dSDimitry Andric 924bdd1243dSDimitry Andric if (isa<FPMathOperator>(V)) 925*0fca6ea1SDimitry Andric V->copyFastMathFlags(CI); 926bdd1243dSDimitry Andric 927*0fca6ea1SDimitry Andric if (!V->getType()->isVoidTy()) 928bdd1243dSDimitry Andric State.set(this, V, Part); 929*0fca6ea1SDimitry Andric State.addMetadata(V, CI); 930bdd1243dSDimitry Andric } 931bdd1243dSDimitry Andric } 932bdd1243dSDimitry Andric 93381ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 93481ad6265SDimitry Andric void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, 93581ad6265SDimitry Andric VPSlotTracker &SlotTracker) const { 93681ad6265SDimitry Andric O << Indent << "WIDEN-CALL "; 93781ad6265SDimitry Andric 938*0fca6ea1SDimitry Andric Function *CalledFn = getCalledScalarFunction(); 939*0fca6ea1SDimitry Andric if (CalledFn->getReturnType()->isVoidTy()) 94081ad6265SDimitry Andric O << "void "; 94181ad6265SDimitry Andric else { 94281ad6265SDimitry Andric printAsOperand(O, SlotTracker); 94381ad6265SDimitry Andric O << " = "; 94481ad6265SDimitry Andric } 94581ad6265SDimitry Andric 946*0fca6ea1SDimitry Andric O << "call @" << CalledFn->getName() << "("; 947*0fca6ea1SDimitry Andric interleaveComma(arg_operands(), O, [&O, &SlotTracker](VPValue *Op) { 948*0fca6ea1SDimitry Andric Op->printAsOperand(O, SlotTracker); 949*0fca6ea1SDimitry Andric }); 95081ad6265SDimitry Andric O << ")"; 951bdd1243dSDimitry Andric 952bdd1243dSDimitry Andric if (VectorIntrinsicID) 953bdd1243dSDimitry Andric O << " (using vector intrinsic)"; 95406c3fb27SDimitry Andric else { 95506c3fb27SDimitry Andric O << " (using library function"; 95606c3fb27SDimitry Andric if (Variant->hasName()) 95706c3fb27SDimitry Andric O << ": " << Variant->getName(); 95806c3fb27SDimitry Andric O << ")"; 95906c3fb27SDimitry Andric } 96081ad6265SDimitry Andric } 96181ad6265SDimitry Andric 96281ad6265SDimitry Andric void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, 96381ad6265SDimitry Andric VPSlotTracker &SlotTracker) const { 96481ad6265SDimitry Andric O << Indent << "WIDEN-SELECT "; 96581ad6265SDimitry Andric printAsOperand(O, SlotTracker); 96681ad6265SDimitry Andric O << " = select "; 96781ad6265SDimitry Andric getOperand(0)->printAsOperand(O, SlotTracker); 96881ad6265SDimitry Andric O << ", "; 96981ad6265SDimitry Andric getOperand(1)->printAsOperand(O, SlotTracker); 97081ad6265SDimitry Andric O << ", "; 97181ad6265SDimitry Andric getOperand(2)->printAsOperand(O, SlotTracker); 97206c3fb27SDimitry Andric O << (isInvariantCond() ? " (condition is loop invariant)" : ""); 97381ad6265SDimitry Andric } 974753f127fSDimitry Andric #endif 97581ad6265SDimitry Andric 976753f127fSDimitry Andric void VPWidenSelectRecipe::execute(VPTransformState &State) { 9775f757f3fSDimitry Andric State.setDebugLocFrom(getDebugLoc()); 978753f127fSDimitry Andric 979753f127fSDimitry Andric // The condition can be loop invariant but still defined inside the 980753f127fSDimitry Andric // loop. This means that we can't just use the original 'cond' value. 981753f127fSDimitry Andric // We have to take the 'vectorized' value and pick the first lane. 982753f127fSDimitry Andric // Instcombine will make this a no-op. 983753f127fSDimitry Andric auto *InvarCond = 98406c3fb27SDimitry Andric isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr; 985753f127fSDimitry Andric 986753f127fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) { 98706c3fb27SDimitry Andric Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part); 988753f127fSDimitry Andric Value *Op0 = State.get(getOperand(1), Part); 989753f127fSDimitry Andric Value *Op1 = State.get(getOperand(2), Part); 990753f127fSDimitry Andric Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1); 991753f127fSDimitry Andric State.set(this, Sel, Part); 9925f757f3fSDimitry Andric State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue())); 993753f127fSDimitry Andric } 994753f127fSDimitry Andric } 995753f127fSDimitry Andric 9965f757f3fSDimitry Andric VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy( 9975f757f3fSDimitry Andric const FastMathFlags &FMF) { 9985f757f3fSDimitry Andric AllowReassoc = FMF.allowReassoc(); 9995f757f3fSDimitry Andric NoNaNs = FMF.noNaNs(); 10005f757f3fSDimitry Andric NoInfs = FMF.noInfs(); 10015f757f3fSDimitry Andric NoSignedZeros = FMF.noSignedZeros(); 10025f757f3fSDimitry Andric AllowReciprocal = FMF.allowReciprocal(); 10035f757f3fSDimitry Andric AllowContract = FMF.allowContract(); 10045f757f3fSDimitry Andric ApproxFunc = FMF.approxFunc(); 10055f757f3fSDimitry Andric } 10065f757f3fSDimitry Andric 100706c3fb27SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 100806c3fb27SDimitry Andric void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const { 100906c3fb27SDimitry Andric switch (OpType) { 10105f757f3fSDimitry Andric case OperationType::Cmp: 10115f757f3fSDimitry Andric O << " " << CmpInst::getPredicateName(getPredicate()); 10125f757f3fSDimitry Andric break; 10135f757f3fSDimitry Andric case OperationType::DisjointOp: 10145f757f3fSDimitry Andric if (DisjointFlags.IsDisjoint) 10155f757f3fSDimitry Andric O << " disjoint"; 10165f757f3fSDimitry Andric break; 101706c3fb27SDimitry Andric case OperationType::PossiblyExactOp: 101806c3fb27SDimitry Andric if (ExactFlags.IsExact) 101906c3fb27SDimitry Andric O << " exact"; 102006c3fb27SDimitry Andric break; 102106c3fb27SDimitry Andric case OperationType::OverflowingBinOp: 102206c3fb27SDimitry Andric if (WrapFlags.HasNUW) 102306c3fb27SDimitry Andric O << " nuw"; 102406c3fb27SDimitry Andric if (WrapFlags.HasNSW) 102506c3fb27SDimitry Andric O << " nsw"; 102606c3fb27SDimitry Andric break; 102706c3fb27SDimitry Andric case OperationType::FPMathOp: 102806c3fb27SDimitry Andric getFastMathFlags().print(O); 102906c3fb27SDimitry Andric break; 103006c3fb27SDimitry Andric case OperationType::GEPOp: 103106c3fb27SDimitry Andric if (GEPFlags.IsInBounds) 103206c3fb27SDimitry Andric O << " inbounds"; 103306c3fb27SDimitry Andric break; 10345f757f3fSDimitry Andric case OperationType::NonNegOp: 10355f757f3fSDimitry Andric if (NonNegFlags.NonNeg) 10365f757f3fSDimitry Andric O << " nneg"; 10375f757f3fSDimitry Andric break; 103806c3fb27SDimitry Andric case OperationType::Other: 103906c3fb27SDimitry Andric break; 104006c3fb27SDimitry Andric } 10415f757f3fSDimitry Andric if (getNumOperands() > 0) 104206c3fb27SDimitry Andric O << " "; 104306c3fb27SDimitry Andric } 104406c3fb27SDimitry Andric #endif 104506c3fb27SDimitry Andric 1046753f127fSDimitry Andric void VPWidenRecipe::execute(VPTransformState &State) { 10475f757f3fSDimitry Andric State.setDebugLocFrom(getDebugLoc()); 1048753f127fSDimitry Andric auto &Builder = State.Builder; 10495f757f3fSDimitry Andric switch (Opcode) { 1050753f127fSDimitry Andric case Instruction::Call: 1051753f127fSDimitry Andric case Instruction::Br: 1052753f127fSDimitry Andric case Instruction::PHI: 1053753f127fSDimitry Andric case Instruction::GetElementPtr: 1054753f127fSDimitry Andric case Instruction::Select: 1055753f127fSDimitry Andric llvm_unreachable("This instruction is handled by a different recipe."); 1056753f127fSDimitry Andric case Instruction::UDiv: 1057753f127fSDimitry Andric case Instruction::SDiv: 1058753f127fSDimitry Andric case Instruction::SRem: 1059753f127fSDimitry Andric case Instruction::URem: 1060753f127fSDimitry Andric case Instruction::Add: 1061753f127fSDimitry Andric case Instruction::FAdd: 1062753f127fSDimitry Andric case Instruction::Sub: 1063753f127fSDimitry Andric case Instruction::FSub: 1064753f127fSDimitry Andric case Instruction::FNeg: 1065753f127fSDimitry Andric case Instruction::Mul: 1066753f127fSDimitry Andric case Instruction::FMul: 1067753f127fSDimitry Andric case Instruction::FDiv: 1068753f127fSDimitry Andric case Instruction::FRem: 1069753f127fSDimitry Andric case Instruction::Shl: 1070753f127fSDimitry Andric case Instruction::LShr: 1071753f127fSDimitry Andric case Instruction::AShr: 1072753f127fSDimitry Andric case Instruction::And: 1073753f127fSDimitry Andric case Instruction::Or: 1074753f127fSDimitry Andric case Instruction::Xor: { 1075753f127fSDimitry Andric // Just widen unops and binops. 1076753f127fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) { 1077753f127fSDimitry Andric SmallVector<Value *, 2> Ops; 1078753f127fSDimitry Andric for (VPValue *VPOp : operands()) 1079753f127fSDimitry Andric Ops.push_back(State.get(VPOp, Part)); 1080753f127fSDimitry Andric 10815f757f3fSDimitry Andric Value *V = Builder.CreateNAryOp(Opcode, Ops); 1082753f127fSDimitry Andric 108306c3fb27SDimitry Andric if (auto *VecOp = dyn_cast<Instruction>(V)) 108406c3fb27SDimitry Andric setFlags(VecOp); 1085753f127fSDimitry Andric 1086753f127fSDimitry Andric // Use this vector value for all users of the original instruction. 1087753f127fSDimitry Andric State.set(this, V, Part); 10885f757f3fSDimitry Andric State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue())); 1089753f127fSDimitry Andric } 1090753f127fSDimitry Andric 1091753f127fSDimitry Andric break; 1092753f127fSDimitry Andric } 1093753f127fSDimitry Andric case Instruction::Freeze: { 1094753f127fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) { 1095753f127fSDimitry Andric Value *Op = State.get(getOperand(0), Part); 1096753f127fSDimitry Andric 1097753f127fSDimitry Andric Value *Freeze = Builder.CreateFreeze(Op); 1098753f127fSDimitry Andric State.set(this, Freeze, Part); 1099753f127fSDimitry Andric } 1100753f127fSDimitry Andric break; 1101753f127fSDimitry Andric } 1102753f127fSDimitry Andric case Instruction::ICmp: 1103753f127fSDimitry Andric case Instruction::FCmp: { 1104753f127fSDimitry Andric // Widen compares. Generate vector compares. 11055f757f3fSDimitry Andric bool FCmp = Opcode == Instruction::FCmp; 1106753f127fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) { 1107753f127fSDimitry Andric Value *A = State.get(getOperand(0), Part); 1108753f127fSDimitry Andric Value *B = State.get(getOperand(1), Part); 1109753f127fSDimitry Andric Value *C = nullptr; 1110753f127fSDimitry Andric if (FCmp) { 1111753f127fSDimitry Andric // Propagate fast math flags. 1112753f127fSDimitry Andric IRBuilder<>::FastMathFlagGuard FMFG(Builder); 11135f757f3fSDimitry Andric if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue())) 11145f757f3fSDimitry Andric Builder.setFastMathFlags(I->getFastMathFlags()); 11155f757f3fSDimitry Andric C = Builder.CreateFCmp(getPredicate(), A, B); 1116753f127fSDimitry Andric } else { 11175f757f3fSDimitry Andric C = Builder.CreateICmp(getPredicate(), A, B); 1118753f127fSDimitry Andric } 1119753f127fSDimitry Andric State.set(this, C, Part); 11205f757f3fSDimitry Andric State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue())); 1121753f127fSDimitry Andric } 1122753f127fSDimitry Andric 1123753f127fSDimitry Andric break; 1124753f127fSDimitry Andric } 1125753f127fSDimitry Andric default: 1126753f127fSDimitry Andric // This instruction is not vectorized by simple widening. 11275f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : " 11285f757f3fSDimitry Andric << Instruction::getOpcodeName(Opcode)); 1129753f127fSDimitry Andric llvm_unreachable("Unhandled instruction!"); 1130753f127fSDimitry Andric } // end of switch. 11315f757f3fSDimitry Andric 11325f757f3fSDimitry Andric #if !defined(NDEBUG) 11335f757f3fSDimitry Andric // Verify that VPlan type inference results agree with the type of the 11345f757f3fSDimitry Andric // generated values. 11355f757f3fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) { 11365f757f3fSDimitry Andric assert(VectorType::get(State.TypeAnalysis.inferScalarType(this), 11375f757f3fSDimitry Andric State.VF) == State.get(this, Part)->getType() && 11385f757f3fSDimitry Andric "inferred type and type from generated instructions do not match"); 1139753f127fSDimitry Andric } 11405f757f3fSDimitry Andric #endif 11415f757f3fSDimitry Andric } 11425f757f3fSDimitry Andric 1143753f127fSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 114481ad6265SDimitry Andric void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, 114581ad6265SDimitry Andric VPSlotTracker &SlotTracker) const { 114681ad6265SDimitry Andric O << Indent << "WIDEN "; 114781ad6265SDimitry Andric printAsOperand(O, SlotTracker); 11485f757f3fSDimitry Andric O << " = " << Instruction::getOpcodeName(Opcode); 114906c3fb27SDimitry Andric printFlags(O); 115081ad6265SDimitry Andric printOperands(O, SlotTracker); 115181ad6265SDimitry Andric } 115206c3fb27SDimitry Andric #endif 115306c3fb27SDimitry Andric 115406c3fb27SDimitry Andric void VPWidenCastRecipe::execute(VPTransformState &State) { 11555f757f3fSDimitry Andric State.setDebugLocFrom(getDebugLoc()); 115606c3fb27SDimitry Andric auto &Builder = State.Builder; 115706c3fb27SDimitry Andric /// Vectorize casts. 115806c3fb27SDimitry Andric assert(State.VF.isVector() && "Not vectorizing?"); 115906c3fb27SDimitry Andric Type *DestTy = VectorType::get(getResultType(), State.VF); 11605f757f3fSDimitry Andric VPValue *Op = getOperand(0); 116106c3fb27SDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) { 11625f757f3fSDimitry Andric if (Part > 0 && Op->isLiveIn()) { 11635f757f3fSDimitry Andric // FIXME: Remove once explicit unrolling is implemented using VPlan. 11645f757f3fSDimitry Andric State.set(this, State.get(this, 0), Part); 11655f757f3fSDimitry Andric continue; 11665f757f3fSDimitry Andric } 11675f757f3fSDimitry Andric Value *A = State.get(Op, Part); 116806c3fb27SDimitry Andric Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy); 116906c3fb27SDimitry Andric State.set(this, Cast, Part); 11705f757f3fSDimitry Andric State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue())); 117106c3fb27SDimitry Andric } 117206c3fb27SDimitry Andric } 117306c3fb27SDimitry Andric 117406c3fb27SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 117506c3fb27SDimitry Andric void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent, 117606c3fb27SDimitry Andric VPSlotTracker &SlotTracker) const { 117706c3fb27SDimitry Andric O << Indent << "WIDEN-CAST "; 117806c3fb27SDimitry Andric printAsOperand(O, SlotTracker); 117906c3fb27SDimitry Andric O << " = " << Instruction::getOpcodeName(Opcode) << " "; 11805f757f3fSDimitry Andric printFlags(O); 118106c3fb27SDimitry Andric printOperands(O, SlotTracker); 118206c3fb27SDimitry Andric O << " to " << *getResultType(); 118306c3fb27SDimitry Andric } 11845f757f3fSDimitry Andric #endif 118581ad6265SDimitry Andric 11865f757f3fSDimitry Andric /// This function adds 11875f757f3fSDimitry Andric /// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...) 11885f757f3fSDimitry Andric /// to each vector element of Val. The sequence starts at StartIndex. 11895f757f3fSDimitry Andric /// \p Opcode is relevant for FP induction variable. 11905f757f3fSDimitry Andric static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step, 11915f757f3fSDimitry Andric Instruction::BinaryOps BinOp, ElementCount VF, 11925f757f3fSDimitry Andric IRBuilderBase &Builder) { 11935f757f3fSDimitry Andric assert(VF.isVector() && "only vector VFs are supported"); 11945f757f3fSDimitry Andric 11955f757f3fSDimitry Andric // Create and check the types. 11965f757f3fSDimitry Andric auto *ValVTy = cast<VectorType>(Val->getType()); 11975f757f3fSDimitry Andric ElementCount VLen = ValVTy->getElementCount(); 11985f757f3fSDimitry Andric 11995f757f3fSDimitry Andric Type *STy = Val->getType()->getScalarType(); 12005f757f3fSDimitry Andric assert((STy->isIntegerTy() || STy->isFloatingPointTy()) && 12015f757f3fSDimitry Andric "Induction Step must be an integer or FP"); 12025f757f3fSDimitry Andric assert(Step->getType() == STy && "Step has wrong type"); 12035f757f3fSDimitry Andric 12045f757f3fSDimitry Andric SmallVector<Constant *, 8> Indices; 12055f757f3fSDimitry Andric 12065f757f3fSDimitry Andric // Create a vector of consecutive numbers from zero to VF. 12075f757f3fSDimitry Andric VectorType *InitVecValVTy = ValVTy; 12085f757f3fSDimitry Andric if (STy->isFloatingPointTy()) { 12095f757f3fSDimitry Andric Type *InitVecValSTy = 12105f757f3fSDimitry Andric IntegerType::get(STy->getContext(), STy->getScalarSizeInBits()); 12115f757f3fSDimitry Andric InitVecValVTy = VectorType::get(InitVecValSTy, VLen); 12125f757f3fSDimitry Andric } 12135f757f3fSDimitry Andric Value *InitVec = Builder.CreateStepVector(InitVecValVTy); 12145f757f3fSDimitry Andric 12155f757f3fSDimitry Andric // Splat the StartIdx 12165f757f3fSDimitry Andric Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx); 12175f757f3fSDimitry Andric 12185f757f3fSDimitry Andric if (STy->isIntegerTy()) { 12195f757f3fSDimitry Andric InitVec = Builder.CreateAdd(InitVec, StartIdxSplat); 12205f757f3fSDimitry Andric Step = Builder.CreateVectorSplat(VLen, Step); 12215f757f3fSDimitry Andric assert(Step->getType() == Val->getType() && "Invalid step vec"); 12225f757f3fSDimitry Andric // FIXME: The newly created binary instructions should contain nsw/nuw 12235f757f3fSDimitry Andric // flags, which can be found from the original scalar operations. 12245f757f3fSDimitry Andric Step = Builder.CreateMul(InitVec, Step); 12255f757f3fSDimitry Andric return Builder.CreateAdd(Val, Step, "induction"); 12265f757f3fSDimitry Andric } 12275f757f3fSDimitry Andric 12285f757f3fSDimitry Andric // Floating point induction. 12295f757f3fSDimitry Andric assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) && 12305f757f3fSDimitry Andric "Binary Opcode should be specified for FP induction"); 12315f757f3fSDimitry Andric InitVec = Builder.CreateUIToFP(InitVec, ValVTy); 12325f757f3fSDimitry Andric InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat); 12335f757f3fSDimitry Andric 12345f757f3fSDimitry Andric Step = Builder.CreateVectorSplat(VLen, Step); 12355f757f3fSDimitry Andric Value *MulOp = Builder.CreateFMul(InitVec, Step); 12365f757f3fSDimitry Andric return Builder.CreateBinOp(BinOp, Val, MulOp, "induction"); 12375f757f3fSDimitry Andric } 12385f757f3fSDimitry Andric 12395f757f3fSDimitry Andric /// A helper function that returns an integer or floating-point constant with 12405f757f3fSDimitry Andric /// value C. 12415f757f3fSDimitry Andric static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { 12425f757f3fSDimitry Andric return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C) 12435f757f3fSDimitry Andric : ConstantFP::get(Ty, C); 12445f757f3fSDimitry Andric } 12455f757f3fSDimitry Andric 12465f757f3fSDimitry Andric static Value *getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy, 12475f757f3fSDimitry Andric ElementCount VF) { 12485f757f3fSDimitry Andric assert(FTy->isFloatingPointTy() && "Expected floating point type!"); 12495f757f3fSDimitry Andric Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits()); 12505f757f3fSDimitry Andric Value *RuntimeVF = getRuntimeVF(B, IntTy, VF); 12515f757f3fSDimitry Andric return B.CreateUIToFP(RuntimeVF, FTy); 12525f757f3fSDimitry Andric } 12535f757f3fSDimitry Andric 12545f757f3fSDimitry Andric void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { 12555f757f3fSDimitry Andric assert(!State.Instance && "Int or FP induction being replicated."); 12565f757f3fSDimitry Andric 12575f757f3fSDimitry Andric Value *Start = getStartValue()->getLiveInIRValue(); 12585f757f3fSDimitry Andric const InductionDescriptor &ID = getInductionDescriptor(); 12595f757f3fSDimitry Andric TruncInst *Trunc = getTruncInst(); 12605f757f3fSDimitry Andric IRBuilderBase &Builder = State.Builder; 12615f757f3fSDimitry Andric assert(IV->getType() == ID.getStartValue()->getType() && "Types must match"); 12625f757f3fSDimitry Andric assert(State.VF.isVector() && "must have vector VF"); 12635f757f3fSDimitry Andric 12645f757f3fSDimitry Andric // The value from the original loop to which we are mapping the new induction 12655f757f3fSDimitry Andric // variable. 12665f757f3fSDimitry Andric Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV; 12675f757f3fSDimitry Andric 12685f757f3fSDimitry Andric // Fast-math-flags propagate from the original induction instruction. 12695f757f3fSDimitry Andric IRBuilder<>::FastMathFlagGuard FMFG(Builder); 12705f757f3fSDimitry Andric if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp())) 12715f757f3fSDimitry Andric Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); 12725f757f3fSDimitry Andric 12735f757f3fSDimitry Andric // Now do the actual transformations, and start with fetching the step value. 12745f757f3fSDimitry Andric Value *Step = State.get(getStepValue(), VPIteration(0, 0)); 12755f757f3fSDimitry Andric 12765f757f3fSDimitry Andric assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) && 12775f757f3fSDimitry Andric "Expected either an induction phi-node or a truncate of it!"); 12785f757f3fSDimitry Andric 12795f757f3fSDimitry Andric // Construct the initial value of the vector IV in the vector loop preheader 12805f757f3fSDimitry Andric auto CurrIP = Builder.saveIP(); 12815f757f3fSDimitry Andric BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 12825f757f3fSDimitry Andric Builder.SetInsertPoint(VectorPH->getTerminator()); 12835f757f3fSDimitry Andric if (isa<TruncInst>(EntryVal)) { 12845f757f3fSDimitry Andric assert(Start->getType()->isIntegerTy() && 12855f757f3fSDimitry Andric "Truncation requires an integer type"); 12865f757f3fSDimitry Andric auto *TruncType = cast<IntegerType>(EntryVal->getType()); 12875f757f3fSDimitry Andric Step = Builder.CreateTrunc(Step, TruncType); 12885f757f3fSDimitry Andric Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType); 12895f757f3fSDimitry Andric } 12905f757f3fSDimitry Andric 12915f757f3fSDimitry Andric Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0); 12925f757f3fSDimitry Andric Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start); 12935f757f3fSDimitry Andric Value *SteppedStart = getStepVector( 12945f757f3fSDimitry Andric SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder); 12955f757f3fSDimitry Andric 12965f757f3fSDimitry Andric // We create vector phi nodes for both integer and floating-point induction 12975f757f3fSDimitry Andric // variables. Here, we determine the kind of arithmetic we will perform. 12985f757f3fSDimitry Andric Instruction::BinaryOps AddOp; 12995f757f3fSDimitry Andric Instruction::BinaryOps MulOp; 13005f757f3fSDimitry Andric if (Step->getType()->isIntegerTy()) { 13015f757f3fSDimitry Andric AddOp = Instruction::Add; 13025f757f3fSDimitry Andric MulOp = Instruction::Mul; 13035f757f3fSDimitry Andric } else { 13045f757f3fSDimitry Andric AddOp = ID.getInductionOpcode(); 13055f757f3fSDimitry Andric MulOp = Instruction::FMul; 13065f757f3fSDimitry Andric } 13075f757f3fSDimitry Andric 13085f757f3fSDimitry Andric // Multiply the vectorization factor by the step using integer or 13095f757f3fSDimitry Andric // floating-point arithmetic as appropriate. 13105f757f3fSDimitry Andric Type *StepType = Step->getType(); 13115f757f3fSDimitry Andric Value *RuntimeVF; 13125f757f3fSDimitry Andric if (Step->getType()->isFloatingPointTy()) 13135f757f3fSDimitry Andric RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF); 13145f757f3fSDimitry Andric else 13155f757f3fSDimitry Andric RuntimeVF = getRuntimeVF(Builder, StepType, State.VF); 13165f757f3fSDimitry Andric Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF); 13175f757f3fSDimitry Andric 13185f757f3fSDimitry Andric // Create a vector splat to use in the induction update. 13195f757f3fSDimitry Andric // 13205f757f3fSDimitry Andric // FIXME: If the step is non-constant, we create the vector splat with 13215f757f3fSDimitry Andric // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't 13225f757f3fSDimitry Andric // handle a constant vector splat. 13235f757f3fSDimitry Andric Value *SplatVF = isa<Constant>(Mul) 13245f757f3fSDimitry Andric ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul)) 13255f757f3fSDimitry Andric : Builder.CreateVectorSplat(State.VF, Mul); 13265f757f3fSDimitry Andric Builder.restoreIP(CurrIP); 13275f757f3fSDimitry Andric 13285f757f3fSDimitry Andric // We may need to add the step a number of times, depending on the unroll 13295f757f3fSDimitry Andric // factor. The last of those goes into the PHI. 13305f757f3fSDimitry Andric PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind"); 13315f757f3fSDimitry Andric VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); 13325f757f3fSDimitry Andric VecInd->setDebugLoc(EntryVal->getDebugLoc()); 13335f757f3fSDimitry Andric Instruction *LastInduction = VecInd; 13345f757f3fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) { 13355f757f3fSDimitry Andric State.set(this, LastInduction, Part); 13365f757f3fSDimitry Andric 13375f757f3fSDimitry Andric if (isa<TruncInst>(EntryVal)) 13385f757f3fSDimitry Andric State.addMetadata(LastInduction, EntryVal); 13395f757f3fSDimitry Andric 13405f757f3fSDimitry Andric LastInduction = cast<Instruction>( 13415f757f3fSDimitry Andric Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")); 13425f757f3fSDimitry Andric LastInduction->setDebugLoc(EntryVal->getDebugLoc()); 13435f757f3fSDimitry Andric } 13445f757f3fSDimitry Andric 13455f757f3fSDimitry Andric LastInduction->setName("vec.ind.next"); 13465f757f3fSDimitry Andric VecInd->addIncoming(SteppedStart, VectorPH); 13475f757f3fSDimitry Andric // Add induction update using an incorrect block temporarily. The phi node 13485f757f3fSDimitry Andric // will be fixed after VPlan execution. Note that at this point the latch 13495f757f3fSDimitry Andric // block cannot be used, as it does not exist yet. 13505f757f3fSDimitry Andric // TODO: Model increment value in VPlan, by turning the recipe into a 13515f757f3fSDimitry Andric // multi-def and a subclass of VPHeaderPHIRecipe. 13525f757f3fSDimitry Andric VecInd->addIncoming(LastInduction, VectorPH); 13535f757f3fSDimitry Andric } 13545f757f3fSDimitry Andric 13555f757f3fSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 135681ad6265SDimitry Andric void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, 135781ad6265SDimitry Andric VPSlotTracker &SlotTracker) const { 135881ad6265SDimitry Andric O << Indent << "WIDEN-INDUCTION"; 135981ad6265SDimitry Andric if (getTruncInst()) { 136081ad6265SDimitry Andric O << "\\l\""; 136181ad6265SDimitry Andric O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; 136281ad6265SDimitry Andric O << " +\n" << Indent << "\" "; 136381ad6265SDimitry Andric getVPValue(0)->printAsOperand(O, SlotTracker); 136481ad6265SDimitry Andric } else 136581ad6265SDimitry Andric O << " " << VPlanIngredient(IV); 136681ad6265SDimitry Andric 136781ad6265SDimitry Andric O << ", "; 136881ad6265SDimitry Andric getStepValue()->printAsOperand(O, SlotTracker); 136981ad6265SDimitry Andric } 137081ad6265SDimitry Andric #endif 137181ad6265SDimitry Andric 137281ad6265SDimitry Andric bool VPWidenIntOrFpInductionRecipe::isCanonical() const { 137306c3fb27SDimitry Andric // The step may be defined by a recipe in the preheader (e.g. if it requires 137406c3fb27SDimitry Andric // SCEV expansion), but for the canonical induction the step is required to be 137506c3fb27SDimitry Andric // 1, which is represented as live-in. 137606c3fb27SDimitry Andric if (getStepValue()->getDefiningRecipe()) 137706c3fb27SDimitry Andric return false; 137806c3fb27SDimitry Andric auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue()); 137981ad6265SDimitry Andric auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); 1380*0fca6ea1SDimitry Andric auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin()); 1381*0fca6ea1SDimitry Andric return StartC && StartC->isZero() && StepC && StepC->isOne() && 1382*0fca6ea1SDimitry Andric getScalarType() == CanIV->getScalarType(); 138381ad6265SDimitry Andric } 138481ad6265SDimitry Andric 1385bdd1243dSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1386bdd1243dSDimitry Andric void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent, 1387bdd1243dSDimitry Andric VPSlotTracker &SlotTracker) const { 1388bdd1243dSDimitry Andric O << Indent; 1389bdd1243dSDimitry Andric printAsOperand(O, SlotTracker); 1390bdd1243dSDimitry Andric O << Indent << "= DERIVED-IV "; 1391bdd1243dSDimitry Andric getStartValue()->printAsOperand(O, SlotTracker); 1392bdd1243dSDimitry Andric O << " + "; 1393*0fca6ea1SDimitry Andric getOperand(1)->printAsOperand(O, SlotTracker); 1394bdd1243dSDimitry Andric O << " * "; 1395bdd1243dSDimitry Andric getStepValue()->printAsOperand(O, SlotTracker); 139681ad6265SDimitry Andric } 1397bdd1243dSDimitry Andric #endif 139881ad6265SDimitry Andric 13995f757f3fSDimitry Andric void VPScalarIVStepsRecipe::execute(VPTransformState &State) { 14005f757f3fSDimitry Andric // Fast-math-flags propagate from the original induction instruction. 14015f757f3fSDimitry Andric IRBuilder<>::FastMathFlagGuard FMFG(State.Builder); 14025f757f3fSDimitry Andric if (hasFastMathFlags()) 14035f757f3fSDimitry Andric State.Builder.setFastMathFlags(getFastMathFlags()); 14045f757f3fSDimitry Andric 14055f757f3fSDimitry Andric /// Compute scalar induction steps. \p ScalarIV is the scalar induction 14065f757f3fSDimitry Andric /// variable on which to base the steps, \p Step is the size of the step. 14075f757f3fSDimitry Andric 14085f757f3fSDimitry Andric Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0)); 14095f757f3fSDimitry Andric Value *Step = State.get(getStepValue(), VPIteration(0, 0)); 14105f757f3fSDimitry Andric IRBuilderBase &Builder = State.Builder; 14115f757f3fSDimitry Andric 14125f757f3fSDimitry Andric // Ensure step has the same type as that of scalar IV. 14135f757f3fSDimitry Andric Type *BaseIVTy = BaseIV->getType()->getScalarType(); 1414*0fca6ea1SDimitry Andric assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!"); 14155f757f3fSDimitry Andric 14165f757f3fSDimitry Andric // We build scalar steps for both integer and floating-point induction 14175f757f3fSDimitry Andric // variables. Here, we determine the kind of arithmetic we will perform. 14185f757f3fSDimitry Andric Instruction::BinaryOps AddOp; 14195f757f3fSDimitry Andric Instruction::BinaryOps MulOp; 14205f757f3fSDimitry Andric if (BaseIVTy->isIntegerTy()) { 14215f757f3fSDimitry Andric AddOp = Instruction::Add; 14225f757f3fSDimitry Andric MulOp = Instruction::Mul; 14235f757f3fSDimitry Andric } else { 14245f757f3fSDimitry Andric AddOp = InductionOpcode; 14255f757f3fSDimitry Andric MulOp = Instruction::FMul; 14265f757f3fSDimitry Andric } 14275f757f3fSDimitry Andric 14285f757f3fSDimitry Andric // Determine the number of scalars we need to generate for each unroll 14295f757f3fSDimitry Andric // iteration. 14305f757f3fSDimitry Andric bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this); 14315f757f3fSDimitry Andric // Compute the scalar steps and save the results in State. 14325f757f3fSDimitry Andric Type *IntStepTy = 14335f757f3fSDimitry Andric IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits()); 14345f757f3fSDimitry Andric Type *VecIVTy = nullptr; 14355f757f3fSDimitry Andric Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr; 14365f757f3fSDimitry Andric if (!FirstLaneOnly && State.VF.isScalable()) { 14375f757f3fSDimitry Andric VecIVTy = VectorType::get(BaseIVTy, State.VF); 14385f757f3fSDimitry Andric UnitStepVec = 14395f757f3fSDimitry Andric Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF)); 14405f757f3fSDimitry Andric SplatStep = Builder.CreateVectorSplat(State.VF, Step); 14415f757f3fSDimitry Andric SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV); 14425f757f3fSDimitry Andric } 14435f757f3fSDimitry Andric 14445f757f3fSDimitry Andric unsigned StartPart = 0; 14455f757f3fSDimitry Andric unsigned EndPart = State.UF; 14465f757f3fSDimitry Andric unsigned StartLane = 0; 14475f757f3fSDimitry Andric unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue(); 14485f757f3fSDimitry Andric if (State.Instance) { 14495f757f3fSDimitry Andric StartPart = State.Instance->Part; 14505f757f3fSDimitry Andric EndPart = StartPart + 1; 14515f757f3fSDimitry Andric StartLane = State.Instance->Lane.getKnownLane(); 14525f757f3fSDimitry Andric EndLane = StartLane + 1; 14535f757f3fSDimitry Andric } 14545f757f3fSDimitry Andric for (unsigned Part = StartPart; Part < EndPart; ++Part) { 14555f757f3fSDimitry Andric Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part); 14565f757f3fSDimitry Andric 14575f757f3fSDimitry Andric if (!FirstLaneOnly && State.VF.isScalable()) { 14585f757f3fSDimitry Andric auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0); 14595f757f3fSDimitry Andric auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec); 14605f757f3fSDimitry Andric if (BaseIVTy->isFloatingPointTy()) 14615f757f3fSDimitry Andric InitVec = Builder.CreateSIToFP(InitVec, VecIVTy); 14625f757f3fSDimitry Andric auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep); 14635f757f3fSDimitry Andric auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul); 14645f757f3fSDimitry Andric State.set(this, Add, Part); 14655f757f3fSDimitry Andric // It's useful to record the lane values too for the known minimum number 14665f757f3fSDimitry Andric // of elements so we do those below. This improves the code quality when 14675f757f3fSDimitry Andric // trying to extract the first element, for example. 14685f757f3fSDimitry Andric } 14695f757f3fSDimitry Andric 14705f757f3fSDimitry Andric if (BaseIVTy->isFloatingPointTy()) 14715f757f3fSDimitry Andric StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy); 14725f757f3fSDimitry Andric 14735f757f3fSDimitry Andric for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) { 14745f757f3fSDimitry Andric Value *StartIdx = Builder.CreateBinOp( 14755f757f3fSDimitry Andric AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane)); 14765f757f3fSDimitry Andric // The step returned by `createStepForVF` is a runtime-evaluated value 14775f757f3fSDimitry Andric // when VF is scalable. Otherwise, it should be folded into a Constant. 14785f757f3fSDimitry Andric assert((State.VF.isScalable() || isa<Constant>(StartIdx)) && 14795f757f3fSDimitry Andric "Expected StartIdx to be folded to a constant when VF is not " 14805f757f3fSDimitry Andric "scalable"); 14815f757f3fSDimitry Andric auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step); 14825f757f3fSDimitry Andric auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul); 14835f757f3fSDimitry Andric State.set(this, Add, VPIteration(Part, Lane)); 14845f757f3fSDimitry Andric } 14855f757f3fSDimitry Andric } 14865f757f3fSDimitry Andric } 14875f757f3fSDimitry Andric 148881ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 148981ad6265SDimitry Andric void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, 149081ad6265SDimitry Andric VPSlotTracker &SlotTracker) const { 149181ad6265SDimitry Andric O << Indent; 149281ad6265SDimitry Andric printAsOperand(O, SlotTracker); 14935f757f3fSDimitry Andric O << " = SCALAR-STEPS "; 149481ad6265SDimitry Andric printOperands(O, SlotTracker); 149581ad6265SDimitry Andric } 1496753f127fSDimitry Andric #endif 149781ad6265SDimitry Andric 1498753f127fSDimitry Andric void VPWidenGEPRecipe::execute(VPTransformState &State) { 149906c3fb27SDimitry Andric assert(State.VF.isVector() && "not widening"); 1500753f127fSDimitry Andric auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr()); 1501753f127fSDimitry Andric // Construct a vector GEP by widening the operands of the scalar GEP as 1502753f127fSDimitry Andric // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP 1503753f127fSDimitry Andric // results in a vector of pointers when at least one operand of the GEP 1504753f127fSDimitry Andric // is vector-typed. Thus, to keep the representation compact, we only use 1505753f127fSDimitry Andric // vector-typed operands for loop-varying values. 1506753f127fSDimitry Andric 150706c3fb27SDimitry Andric if (areAllOperandsInvariant()) { 1508753f127fSDimitry Andric // If we are vectorizing, but the GEP has only loop-invariant operands, 1509753f127fSDimitry Andric // the GEP we build (by only using vector-typed operands for 1510753f127fSDimitry Andric // loop-varying values) would be a scalar pointer. Thus, to ensure we 1511753f127fSDimitry Andric // produce a vector of pointers, we need to either arbitrarily pick an 1512753f127fSDimitry Andric // operand to broadcast, or broadcast a clone of the original GEP. 1513753f127fSDimitry Andric // Here, we broadcast a clone of the original. 1514753f127fSDimitry Andric // 1515753f127fSDimitry Andric // TODO: If at some point we decide to scalarize instructions having 1516753f127fSDimitry Andric // loop-invariant operands, this special case will no longer be 1517753f127fSDimitry Andric // required. We would add the scalarization decision to 1518753f127fSDimitry Andric // collectLoopScalars() and teach getVectorValue() to broadcast 1519753f127fSDimitry Andric // the lane-zero scalar value. 152006c3fb27SDimitry Andric SmallVector<Value *> Ops; 152106c3fb27SDimitry Andric for (unsigned I = 0, E = getNumOperands(); I != E; I++) 152206c3fb27SDimitry Andric Ops.push_back(State.get(getOperand(I), VPIteration(0, 0))); 152306c3fb27SDimitry Andric 152406c3fb27SDimitry Andric auto *NewGEP = 152506c3fb27SDimitry Andric State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0], 152606c3fb27SDimitry Andric ArrayRef(Ops).drop_front(), "", isInBounds()); 1527753f127fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) { 152806c3fb27SDimitry Andric Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP); 1529753f127fSDimitry Andric State.set(this, EntryPart, Part); 1530753f127fSDimitry Andric State.addMetadata(EntryPart, GEP); 1531753f127fSDimitry Andric } 1532753f127fSDimitry Andric } else { 1533753f127fSDimitry Andric // If the GEP has at least one loop-varying operand, we are sure to 1534753f127fSDimitry Andric // produce a vector of pointers. But if we are only unrolling, we want 1535753f127fSDimitry Andric // to produce a scalar GEP for each unroll part. Thus, the GEP we 1536753f127fSDimitry Andric // produce with the code below will be scalar (if VF == 1) or vector 1537753f127fSDimitry Andric // (otherwise). Note that for the unroll-only case, we still maintain 1538753f127fSDimitry Andric // values in the vector mapping with initVector, as we do for other 1539753f127fSDimitry Andric // instructions. 1540753f127fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) { 1541753f127fSDimitry Andric // The pointer operand of the new GEP. If it's loop-invariant, we 1542753f127fSDimitry Andric // won't broadcast it. 154306c3fb27SDimitry Andric auto *Ptr = isPointerLoopInvariant() 1544753f127fSDimitry Andric ? State.get(getOperand(0), VPIteration(0, 0)) 1545753f127fSDimitry Andric : State.get(getOperand(0), Part); 1546753f127fSDimitry Andric 1547753f127fSDimitry Andric // Collect all the indices for the new GEP. If any index is 1548753f127fSDimitry Andric // loop-invariant, we won't broadcast it. 1549753f127fSDimitry Andric SmallVector<Value *, 4> Indices; 1550753f127fSDimitry Andric for (unsigned I = 1, E = getNumOperands(); I < E; I++) { 1551753f127fSDimitry Andric VPValue *Operand = getOperand(I); 155206c3fb27SDimitry Andric if (isIndexLoopInvariant(I - 1)) 1553753f127fSDimitry Andric Indices.push_back(State.get(Operand, VPIteration(0, 0))); 1554753f127fSDimitry Andric else 1555753f127fSDimitry Andric Indices.push_back(State.get(Operand, Part)); 1556753f127fSDimitry Andric } 1557753f127fSDimitry Andric 1558753f127fSDimitry Andric // Create the new GEP. Note that this GEP may be a scalar if VF == 1, 1559753f127fSDimitry Andric // but it should be a vector, otherwise. 1560753f127fSDimitry Andric auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr, 156106c3fb27SDimitry Andric Indices, "", isInBounds()); 1562753f127fSDimitry Andric assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && 1563753f127fSDimitry Andric "NewGEP is not a pointer vector"); 1564753f127fSDimitry Andric State.set(this, NewGEP, Part); 1565753f127fSDimitry Andric State.addMetadata(NewGEP, GEP); 1566753f127fSDimitry Andric } 1567753f127fSDimitry Andric } 1568753f127fSDimitry Andric } 1569753f127fSDimitry Andric 1570753f127fSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 157181ad6265SDimitry Andric void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, 157281ad6265SDimitry Andric VPSlotTracker &SlotTracker) const { 157381ad6265SDimitry Andric O << Indent << "WIDEN-GEP "; 157406c3fb27SDimitry Andric O << (isPointerLoopInvariant() ? "Inv" : "Var"); 157506c3fb27SDimitry Andric for (size_t I = 0; I < getNumOperands() - 1; ++I) 157606c3fb27SDimitry Andric O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]"; 157781ad6265SDimitry Andric 157881ad6265SDimitry Andric O << " "; 157981ad6265SDimitry Andric printAsOperand(O, SlotTracker); 158081ad6265SDimitry Andric O << " = getelementptr"; 158106c3fb27SDimitry Andric printFlags(O); 158281ad6265SDimitry Andric printOperands(O, SlotTracker); 158381ad6265SDimitry Andric } 1584753f127fSDimitry Andric #endif 158581ad6265SDimitry Andric 1586647cbc5dSDimitry Andric void VPVectorPointerRecipe ::execute(VPTransformState &State) { 1587647cbc5dSDimitry Andric auto &Builder = State.Builder; 1588647cbc5dSDimitry Andric State.setDebugLocFrom(getDebugLoc()); 1589647cbc5dSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) { 1590647cbc5dSDimitry Andric // Calculate the pointer for the specific unroll-part. 1591647cbc5dSDimitry Andric Value *PartPtr = nullptr; 1592647cbc5dSDimitry Andric // Use i32 for the gep index type when the value is constant, 1593647cbc5dSDimitry Andric // or query DataLayout for a more suitable index type otherwise. 1594647cbc5dSDimitry Andric const DataLayout &DL = 1595*0fca6ea1SDimitry Andric Builder.GetInsertBlock()->getDataLayout(); 1596647cbc5dSDimitry Andric Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0) 1597647cbc5dSDimitry Andric ? DL.getIndexType(IndexedTy->getPointerTo()) 1598647cbc5dSDimitry Andric : Builder.getInt32Ty(); 1599647cbc5dSDimitry Andric Value *Ptr = State.get(getOperand(0), VPIteration(0, 0)); 16001db9f3b2SDimitry Andric bool InBounds = isInBounds(); 1601647cbc5dSDimitry Andric if (IsReverse) { 1602647cbc5dSDimitry Andric // If the address is consecutive but reversed, then the 1603647cbc5dSDimitry Andric // wide store needs to start at the last vector element. 1604647cbc5dSDimitry Andric // RunTimeVF = VScale * VF.getKnownMinValue() 1605647cbc5dSDimitry Andric // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() 1606647cbc5dSDimitry Andric Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF); 1607647cbc5dSDimitry Andric // NumElt = -Part * RunTimeVF 1608647cbc5dSDimitry Andric Value *NumElt = Builder.CreateMul( 1609647cbc5dSDimitry Andric ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF); 1610647cbc5dSDimitry Andric // LastLane = 1 - RunTimeVF 1611647cbc5dSDimitry Andric Value *LastLane = 1612647cbc5dSDimitry Andric Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); 1613647cbc5dSDimitry Andric PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds); 1614647cbc5dSDimitry Andric PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds); 1615647cbc5dSDimitry Andric } else { 1616647cbc5dSDimitry Andric Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part); 1617647cbc5dSDimitry Andric PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds); 1618647cbc5dSDimitry Andric } 1619647cbc5dSDimitry Andric 1620*0fca6ea1SDimitry Andric State.set(this, PartPtr, Part, /*IsScalar*/ true); 1621647cbc5dSDimitry Andric } 1622647cbc5dSDimitry Andric } 1623647cbc5dSDimitry Andric 1624647cbc5dSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1625647cbc5dSDimitry Andric void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent, 1626647cbc5dSDimitry Andric VPSlotTracker &SlotTracker) const { 1627647cbc5dSDimitry Andric O << Indent; 1628647cbc5dSDimitry Andric printAsOperand(O, SlotTracker); 1629647cbc5dSDimitry Andric O << " = vector-pointer "; 1630647cbc5dSDimitry Andric if (IsReverse) 1631647cbc5dSDimitry Andric O << "(reverse) "; 1632647cbc5dSDimitry Andric 1633647cbc5dSDimitry Andric printOperands(O, SlotTracker); 1634647cbc5dSDimitry Andric } 1635647cbc5dSDimitry Andric #endif 1636647cbc5dSDimitry Andric 1637753f127fSDimitry Andric void VPBlendRecipe::execute(VPTransformState &State) { 16385f757f3fSDimitry Andric State.setDebugLocFrom(getDebugLoc()); 1639753f127fSDimitry Andric // We know that all PHIs in non-header blocks are converted into 1640753f127fSDimitry Andric // selects, so we don't have to worry about the insertion order and we 1641753f127fSDimitry Andric // can just use the builder. 1642753f127fSDimitry Andric // At this point we generate the predication tree. There may be 1643753f127fSDimitry Andric // duplications since this is a simple recursive scan, but future 1644753f127fSDimitry Andric // optimizations will clean it up. 1645753f127fSDimitry Andric 1646753f127fSDimitry Andric unsigned NumIncoming = getNumIncomingValues(); 1647753f127fSDimitry Andric 1648753f127fSDimitry Andric // Generate a sequence of selects of the form: 1649753f127fSDimitry Andric // SELECT(Mask3, In3, 1650753f127fSDimitry Andric // SELECT(Mask2, In2, 1651753f127fSDimitry Andric // SELECT(Mask1, In1, 1652753f127fSDimitry Andric // In0))) 1653753f127fSDimitry Andric // Note that Mask0 is never used: lanes for which no path reaches this phi and 1654753f127fSDimitry Andric // are essentially undef are taken from In0. 1655753f127fSDimitry Andric VectorParts Entry(State.UF); 1656*0fca6ea1SDimitry Andric bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); 1657753f127fSDimitry Andric for (unsigned In = 0; In < NumIncoming; ++In) { 1658753f127fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) { 1659753f127fSDimitry Andric // We might have single edge PHIs (blocks) - use an identity 1660753f127fSDimitry Andric // 'select' for the first PHI operand. 1661*0fca6ea1SDimitry Andric Value *In0 = State.get(getIncomingValue(In), Part, OnlyFirstLaneUsed); 1662753f127fSDimitry Andric if (In == 0) 1663753f127fSDimitry Andric Entry[Part] = In0; // Initialize with the first incoming value. 1664753f127fSDimitry Andric else { 1665753f127fSDimitry Andric // Select between the current value and the previous incoming edge 1666753f127fSDimitry Andric // based on the incoming mask. 1667*0fca6ea1SDimitry Andric Value *Cond = State.get(getMask(In), Part, OnlyFirstLaneUsed); 1668753f127fSDimitry Andric Entry[Part] = 1669753f127fSDimitry Andric State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi"); 1670753f127fSDimitry Andric } 1671753f127fSDimitry Andric } 1672753f127fSDimitry Andric } 1673753f127fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) 1674*0fca6ea1SDimitry Andric State.set(this, Entry[Part], Part, OnlyFirstLaneUsed); 1675753f127fSDimitry Andric } 1676753f127fSDimitry Andric 1677753f127fSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 167881ad6265SDimitry Andric void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, 167981ad6265SDimitry Andric VPSlotTracker &SlotTracker) const { 168081ad6265SDimitry Andric O << Indent << "BLEND "; 16815f757f3fSDimitry Andric printAsOperand(O, SlotTracker); 168281ad6265SDimitry Andric O << " ="; 168381ad6265SDimitry Andric if (getNumIncomingValues() == 1) { 168481ad6265SDimitry Andric // Not a User of any mask: not really blending, this is a 168581ad6265SDimitry Andric // single-predecessor phi. 168681ad6265SDimitry Andric O << " "; 168781ad6265SDimitry Andric getIncomingValue(0)->printAsOperand(O, SlotTracker); 168881ad6265SDimitry Andric } else { 168981ad6265SDimitry Andric for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { 169081ad6265SDimitry Andric O << " "; 169181ad6265SDimitry Andric getIncomingValue(I)->printAsOperand(O, SlotTracker); 1692*0fca6ea1SDimitry Andric if (I == 0) 1693*0fca6ea1SDimitry Andric continue; 169481ad6265SDimitry Andric O << "/"; 169581ad6265SDimitry Andric getMask(I)->printAsOperand(O, SlotTracker); 169681ad6265SDimitry Andric } 169781ad6265SDimitry Andric } 169881ad6265SDimitry Andric } 1699*0fca6ea1SDimitry Andric #endif 170081ad6265SDimitry Andric 1701*0fca6ea1SDimitry Andric void VPReductionRecipe::execute(VPTransformState &State) { 1702*0fca6ea1SDimitry Andric assert(!State.Instance && "Reduction being replicated."); 1703*0fca6ea1SDimitry Andric Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true); 1704*0fca6ea1SDimitry Andric RecurKind Kind = RdxDesc.getRecurrenceKind(); 1705*0fca6ea1SDimitry Andric // Propagate the fast-math flags carried by the underlying instruction. 1706*0fca6ea1SDimitry Andric IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 1707*0fca6ea1SDimitry Andric State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); 1708*0fca6ea1SDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) { 1709*0fca6ea1SDimitry Andric Value *NewVecOp = State.get(getVecOp(), Part); 1710*0fca6ea1SDimitry Andric if (VPValue *Cond = getCondOp()) { 1711*0fca6ea1SDimitry Andric Value *NewCond = State.get(Cond, Part, State.VF.isScalar()); 1712*0fca6ea1SDimitry Andric VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType()); 1713*0fca6ea1SDimitry Andric Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType(); 1714*0fca6ea1SDimitry Andric Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy, 1715*0fca6ea1SDimitry Andric RdxDesc.getFastMathFlags()); 1716*0fca6ea1SDimitry Andric if (State.VF.isVector()) { 1717*0fca6ea1SDimitry Andric Iden = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden); 1718*0fca6ea1SDimitry Andric } 1719*0fca6ea1SDimitry Andric 1720*0fca6ea1SDimitry Andric Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden); 1721*0fca6ea1SDimitry Andric NewVecOp = Select; 1722*0fca6ea1SDimitry Andric } 1723*0fca6ea1SDimitry Andric Value *NewRed; 1724*0fca6ea1SDimitry Andric Value *NextInChain; 1725*0fca6ea1SDimitry Andric if (IsOrdered) { 1726*0fca6ea1SDimitry Andric if (State.VF.isVector()) 1727*0fca6ea1SDimitry Andric NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp, 1728*0fca6ea1SDimitry Andric PrevInChain); 1729*0fca6ea1SDimitry Andric else 1730*0fca6ea1SDimitry Andric NewRed = State.Builder.CreateBinOp( 1731*0fca6ea1SDimitry Andric (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain, 1732*0fca6ea1SDimitry Andric NewVecOp); 1733*0fca6ea1SDimitry Andric PrevInChain = NewRed; 1734*0fca6ea1SDimitry Andric } else { 1735*0fca6ea1SDimitry Andric PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true); 1736*0fca6ea1SDimitry Andric NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp); 1737*0fca6ea1SDimitry Andric } 1738*0fca6ea1SDimitry Andric if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) { 1739*0fca6ea1SDimitry Andric NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(), 1740*0fca6ea1SDimitry Andric NewRed, PrevInChain); 1741*0fca6ea1SDimitry Andric } else if (IsOrdered) 1742*0fca6ea1SDimitry Andric NextInChain = NewRed; 1743*0fca6ea1SDimitry Andric else 1744*0fca6ea1SDimitry Andric NextInChain = State.Builder.CreateBinOp( 1745*0fca6ea1SDimitry Andric (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain); 1746*0fca6ea1SDimitry Andric State.set(this, NextInChain, Part, /*IsScalar*/ true); 1747*0fca6ea1SDimitry Andric } 1748*0fca6ea1SDimitry Andric } 1749*0fca6ea1SDimitry Andric 1750*0fca6ea1SDimitry Andric void VPReductionEVLRecipe::execute(VPTransformState &State) { 1751*0fca6ea1SDimitry Andric assert(!State.Instance && "Reduction being replicated."); 1752*0fca6ea1SDimitry Andric assert(State.UF == 1 && 1753*0fca6ea1SDimitry Andric "Expected only UF == 1 when vectorizing with explicit vector length."); 1754*0fca6ea1SDimitry Andric 1755*0fca6ea1SDimitry Andric auto &Builder = State.Builder; 1756*0fca6ea1SDimitry Andric // Propagate the fast-math flags carried by the underlying instruction. 1757*0fca6ea1SDimitry Andric IRBuilderBase::FastMathFlagGuard FMFGuard(Builder); 1758*0fca6ea1SDimitry Andric const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor(); 1759*0fca6ea1SDimitry Andric Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); 1760*0fca6ea1SDimitry Andric 1761*0fca6ea1SDimitry Andric RecurKind Kind = RdxDesc.getRecurrenceKind(); 1762*0fca6ea1SDimitry Andric Value *Prev = State.get(getChainOp(), 0, /*IsScalar*/ true); 1763*0fca6ea1SDimitry Andric Value *VecOp = State.get(getVecOp(), 0); 1764*0fca6ea1SDimitry Andric Value *EVL = State.get(getEVL(), VPIteration(0, 0)); 1765*0fca6ea1SDimitry Andric 1766*0fca6ea1SDimitry Andric VectorBuilder VBuilder(Builder); 1767*0fca6ea1SDimitry Andric VBuilder.setEVL(EVL); 1768*0fca6ea1SDimitry Andric Value *Mask; 1769*0fca6ea1SDimitry Andric // TODO: move the all-true mask generation into VectorBuilder. 1770*0fca6ea1SDimitry Andric if (VPValue *CondOp = getCondOp()) 1771*0fca6ea1SDimitry Andric Mask = State.get(CondOp, 0); 1772*0fca6ea1SDimitry Andric else 1773*0fca6ea1SDimitry Andric Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue()); 1774*0fca6ea1SDimitry Andric VBuilder.setMask(Mask); 1775*0fca6ea1SDimitry Andric 1776*0fca6ea1SDimitry Andric Value *NewRed; 1777*0fca6ea1SDimitry Andric if (isOrdered()) { 1778*0fca6ea1SDimitry Andric NewRed = createOrderedReduction(VBuilder, RdxDesc, VecOp, Prev); 1779*0fca6ea1SDimitry Andric } else { 1780*0fca6ea1SDimitry Andric NewRed = createSimpleTargetReduction(VBuilder, VecOp, RdxDesc); 1781*0fca6ea1SDimitry Andric if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) 1782*0fca6ea1SDimitry Andric NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev); 1783*0fca6ea1SDimitry Andric else 1784*0fca6ea1SDimitry Andric NewRed = Builder.CreateBinOp( 1785*0fca6ea1SDimitry Andric (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, Prev); 1786*0fca6ea1SDimitry Andric } 1787*0fca6ea1SDimitry Andric State.set(this, NewRed, 0, /*IsScalar*/ true); 1788*0fca6ea1SDimitry Andric } 1789*0fca6ea1SDimitry Andric 1790*0fca6ea1SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 179181ad6265SDimitry Andric void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, 179281ad6265SDimitry Andric VPSlotTracker &SlotTracker) const { 179381ad6265SDimitry Andric O << Indent << "REDUCE "; 179481ad6265SDimitry Andric printAsOperand(O, SlotTracker); 179581ad6265SDimitry Andric O << " = "; 179681ad6265SDimitry Andric getChainOp()->printAsOperand(O, SlotTracker); 179781ad6265SDimitry Andric O << " +"; 179881ad6265SDimitry Andric if (isa<FPMathOperator>(getUnderlyingInstr())) 179981ad6265SDimitry Andric O << getUnderlyingInstr()->getFastMathFlags(); 18005f757f3fSDimitry Andric O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " ("; 180181ad6265SDimitry Andric getVecOp()->printAsOperand(O, SlotTracker); 1802*0fca6ea1SDimitry Andric if (isConditional()) { 1803*0fca6ea1SDimitry Andric O << ", "; 1804*0fca6ea1SDimitry Andric getCondOp()->printAsOperand(O, SlotTracker); 1805*0fca6ea1SDimitry Andric } 1806*0fca6ea1SDimitry Andric O << ")"; 1807*0fca6ea1SDimitry Andric if (RdxDesc.IntermediateStore) 1808*0fca6ea1SDimitry Andric O << " (with final reduction value stored in invariant address sank " 1809*0fca6ea1SDimitry Andric "outside of loop)"; 1810*0fca6ea1SDimitry Andric } 1811*0fca6ea1SDimitry Andric 1812*0fca6ea1SDimitry Andric void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent, 1813*0fca6ea1SDimitry Andric VPSlotTracker &SlotTracker) const { 1814*0fca6ea1SDimitry Andric const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor(); 1815*0fca6ea1SDimitry Andric O << Indent << "REDUCE "; 1816*0fca6ea1SDimitry Andric printAsOperand(O, SlotTracker); 1817*0fca6ea1SDimitry Andric O << " = "; 1818*0fca6ea1SDimitry Andric getChainOp()->printAsOperand(O, SlotTracker); 1819*0fca6ea1SDimitry Andric O << " +"; 1820*0fca6ea1SDimitry Andric if (isa<FPMathOperator>(getUnderlyingInstr())) 1821*0fca6ea1SDimitry Andric O << getUnderlyingInstr()->getFastMathFlags(); 1822*0fca6ea1SDimitry Andric O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " ("; 1823*0fca6ea1SDimitry Andric getVecOp()->printAsOperand(O, SlotTracker); 1824*0fca6ea1SDimitry Andric O << ", "; 1825*0fca6ea1SDimitry Andric getEVL()->printAsOperand(O, SlotTracker); 1826*0fca6ea1SDimitry Andric if (isConditional()) { 182781ad6265SDimitry Andric O << ", "; 182881ad6265SDimitry Andric getCondOp()->printAsOperand(O, SlotTracker); 182981ad6265SDimitry Andric } 183081ad6265SDimitry Andric O << ")"; 18315f757f3fSDimitry Andric if (RdxDesc.IntermediateStore) 183281ad6265SDimitry Andric O << " (with final reduction value stored in invariant address sank " 183381ad6265SDimitry Andric "outside of loop)"; 183481ad6265SDimitry Andric } 183506c3fb27SDimitry Andric #endif 183681ad6265SDimitry Andric 183706c3fb27SDimitry Andric bool VPReplicateRecipe::shouldPack() const { 183806c3fb27SDimitry Andric // Find if the recipe is used by a widened recipe via an intervening 183906c3fb27SDimitry Andric // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector. 184006c3fb27SDimitry Andric return any_of(users(), [](const VPUser *U) { 184106c3fb27SDimitry Andric if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U)) 184206c3fb27SDimitry Andric return any_of(PredR->users(), [PredR](const VPUser *U) { 184306c3fb27SDimitry Andric return !U->usesScalars(PredR); 184406c3fb27SDimitry Andric }); 184506c3fb27SDimitry Andric return false; 184606c3fb27SDimitry Andric }); 184706c3fb27SDimitry Andric } 184806c3fb27SDimitry Andric 184906c3fb27SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 185081ad6265SDimitry Andric void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, 185181ad6265SDimitry Andric VPSlotTracker &SlotTracker) const { 185281ad6265SDimitry Andric O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); 185381ad6265SDimitry Andric 185481ad6265SDimitry Andric if (!getUnderlyingInstr()->getType()->isVoidTy()) { 185581ad6265SDimitry Andric printAsOperand(O, SlotTracker); 185681ad6265SDimitry Andric O << " = "; 185781ad6265SDimitry Andric } 185881ad6265SDimitry Andric if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) { 185906c3fb27SDimitry Andric O << "call"; 186006c3fb27SDimitry Andric printFlags(O); 186106c3fb27SDimitry Andric O << "@" << CB->getCalledFunction()->getName() << "("; 186281ad6265SDimitry Andric interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)), 186381ad6265SDimitry Andric O, [&O, &SlotTracker](VPValue *Op) { 186481ad6265SDimitry Andric Op->printAsOperand(O, SlotTracker); 186581ad6265SDimitry Andric }); 186681ad6265SDimitry Andric O << ")"; 186781ad6265SDimitry Andric } else { 186806c3fb27SDimitry Andric O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()); 186906c3fb27SDimitry Andric printFlags(O); 187081ad6265SDimitry Andric printOperands(O, SlotTracker); 187181ad6265SDimitry Andric } 187281ad6265SDimitry Andric 187306c3fb27SDimitry Andric if (shouldPack()) 187481ad6265SDimitry Andric O << " (S->V)"; 187581ad6265SDimitry Andric } 1876753f127fSDimitry Andric #endif 187781ad6265SDimitry Andric 1878*0fca6ea1SDimitry Andric /// Checks if \p C is uniform across all VFs and UFs. It is considered as such 1879*0fca6ea1SDimitry Andric /// if it is either defined outside the vector region or its operand is known to 1880*0fca6ea1SDimitry Andric /// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI). 1881*0fca6ea1SDimitry Andric /// TODO: Uniformity should be associated with a VPValue and there should be a 1882*0fca6ea1SDimitry Andric /// generic way to check. 1883*0fca6ea1SDimitry Andric static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C) { 1884*0fca6ea1SDimitry Andric return C->isDefinedOutsideVectorRegions() || 1885*0fca6ea1SDimitry Andric isa<VPDerivedIVRecipe>(C->getOperand(0)) || 1886*0fca6ea1SDimitry Andric isa<VPCanonicalIVPHIRecipe>(C->getOperand(0)); 1887*0fca6ea1SDimitry Andric } 1888*0fca6ea1SDimitry Andric 1889*0fca6ea1SDimitry Andric Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) { 1890*0fca6ea1SDimitry Andric assert(vputils::onlyFirstLaneUsed(this) && 1891*0fca6ea1SDimitry Andric "Codegen only implemented for first lane."); 1892*0fca6ea1SDimitry Andric switch (Opcode) { 1893*0fca6ea1SDimitry Andric case Instruction::SExt: 1894*0fca6ea1SDimitry Andric case Instruction::ZExt: 1895*0fca6ea1SDimitry Andric case Instruction::Trunc: { 1896*0fca6ea1SDimitry Andric // Note: SExt/ZExt not used yet. 1897*0fca6ea1SDimitry Andric Value *Op = State.get(getOperand(0), VPIteration(Part, 0)); 1898*0fca6ea1SDimitry Andric return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy); 1899*0fca6ea1SDimitry Andric } 1900*0fca6ea1SDimitry Andric default: 1901*0fca6ea1SDimitry Andric llvm_unreachable("opcode not implemented yet"); 1902*0fca6ea1SDimitry Andric } 1903*0fca6ea1SDimitry Andric } 1904*0fca6ea1SDimitry Andric 1905*0fca6ea1SDimitry Andric void VPScalarCastRecipe ::execute(VPTransformState &State) { 1906*0fca6ea1SDimitry Andric bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this); 1907*0fca6ea1SDimitry Andric for (unsigned Part = 0; Part != State.UF; ++Part) { 1908*0fca6ea1SDimitry Andric Value *Res; 1909*0fca6ea1SDimitry Andric // Only generate a single instance, if the recipe is uniform across UFs and 1910*0fca6ea1SDimitry Andric // VFs. 1911*0fca6ea1SDimitry Andric if (Part > 0 && IsUniformAcrossVFsAndUFs) 1912*0fca6ea1SDimitry Andric Res = State.get(this, VPIteration(0, 0)); 1913*0fca6ea1SDimitry Andric else 1914*0fca6ea1SDimitry Andric Res = generate(State, Part); 1915*0fca6ea1SDimitry Andric State.set(this, Res, VPIteration(Part, 0)); 1916*0fca6ea1SDimitry Andric } 1917*0fca6ea1SDimitry Andric } 1918*0fca6ea1SDimitry Andric 1919*0fca6ea1SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1920*0fca6ea1SDimitry Andric void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent, 1921*0fca6ea1SDimitry Andric VPSlotTracker &SlotTracker) const { 1922*0fca6ea1SDimitry Andric O << Indent << "SCALAR-CAST "; 1923*0fca6ea1SDimitry Andric printAsOperand(O, SlotTracker); 1924*0fca6ea1SDimitry Andric O << " = " << Instruction::getOpcodeName(Opcode) << " "; 1925*0fca6ea1SDimitry Andric printOperands(O, SlotTracker); 1926*0fca6ea1SDimitry Andric O << " to " << *ResultTy; 1927*0fca6ea1SDimitry Andric } 1928*0fca6ea1SDimitry Andric #endif 1929*0fca6ea1SDimitry Andric 1930753f127fSDimitry Andric void VPBranchOnMaskRecipe::execute(VPTransformState &State) { 1931753f127fSDimitry Andric assert(State.Instance && "Branch on Mask works only on single instance."); 1932753f127fSDimitry Andric 1933753f127fSDimitry Andric unsigned Part = State.Instance->Part; 1934753f127fSDimitry Andric unsigned Lane = State.Instance->Lane.getKnownLane(); 1935753f127fSDimitry Andric 1936753f127fSDimitry Andric Value *ConditionBit = nullptr; 1937753f127fSDimitry Andric VPValue *BlockInMask = getMask(); 1938753f127fSDimitry Andric if (BlockInMask) { 1939753f127fSDimitry Andric ConditionBit = State.get(BlockInMask, Part); 1940753f127fSDimitry Andric if (ConditionBit->getType()->isVectorTy()) 1941753f127fSDimitry Andric ConditionBit = State.Builder.CreateExtractElement( 1942753f127fSDimitry Andric ConditionBit, State.Builder.getInt32(Lane)); 1943753f127fSDimitry Andric } else // Block in mask is all-one. 1944753f127fSDimitry Andric ConditionBit = State.Builder.getTrue(); 1945753f127fSDimitry Andric 1946753f127fSDimitry Andric // Replace the temporary unreachable terminator with a new conditional branch, 1947753f127fSDimitry Andric // whose two destinations will be set later when they are created. 1948753f127fSDimitry Andric auto *CurrentTerminator = State.CFG.PrevBB->getTerminator(); 1949753f127fSDimitry Andric assert(isa<UnreachableInst>(CurrentTerminator) && 1950753f127fSDimitry Andric "Expected to replace unreachable terminator with conditional branch."); 1951753f127fSDimitry Andric auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit); 1952753f127fSDimitry Andric CondBr->setSuccessor(0, nullptr); 1953753f127fSDimitry Andric ReplaceInstWithInst(CurrentTerminator, CondBr); 1954753f127fSDimitry Andric } 1955753f127fSDimitry Andric 1956fcaf7f86SDimitry Andric void VPPredInstPHIRecipe::execute(VPTransformState &State) { 1957fcaf7f86SDimitry Andric assert(State.Instance && "Predicated instruction PHI works per instance."); 1958fcaf7f86SDimitry Andric Instruction *ScalarPredInst = 1959fcaf7f86SDimitry Andric cast<Instruction>(State.get(getOperand(0), *State.Instance)); 1960fcaf7f86SDimitry Andric BasicBlock *PredicatedBB = ScalarPredInst->getParent(); 1961fcaf7f86SDimitry Andric BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); 1962fcaf7f86SDimitry Andric assert(PredicatingBB && "Predicated block has no single predecessor."); 1963fcaf7f86SDimitry Andric assert(isa<VPReplicateRecipe>(getOperand(0)) && 1964fcaf7f86SDimitry Andric "operand must be VPReplicateRecipe"); 1965fcaf7f86SDimitry Andric 1966fcaf7f86SDimitry Andric // By current pack/unpack logic we need to generate only a single phi node: if 1967fcaf7f86SDimitry Andric // a vector value for the predicated instruction exists at this point it means 1968fcaf7f86SDimitry Andric // the instruction has vector users only, and a phi for the vector value is 1969fcaf7f86SDimitry Andric // needed. In this case the recipe of the predicated instruction is marked to 1970fcaf7f86SDimitry Andric // also do that packing, thereby "hoisting" the insert-element sequence. 1971fcaf7f86SDimitry Andric // Otherwise, a phi node for the scalar value is needed. 1972fcaf7f86SDimitry Andric unsigned Part = State.Instance->Part; 1973fcaf7f86SDimitry Andric if (State.hasVectorValue(getOperand(0), Part)) { 1974fcaf7f86SDimitry Andric Value *VectorValue = State.get(getOperand(0), Part); 1975fcaf7f86SDimitry Andric InsertElementInst *IEI = cast<InsertElementInst>(VectorValue); 1976fcaf7f86SDimitry Andric PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); 1977fcaf7f86SDimitry Andric VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. 1978fcaf7f86SDimitry Andric VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. 1979fcaf7f86SDimitry Andric if (State.hasVectorValue(this, Part)) 1980fcaf7f86SDimitry Andric State.reset(this, VPhi, Part); 1981fcaf7f86SDimitry Andric else 1982fcaf7f86SDimitry Andric State.set(this, VPhi, Part); 1983fcaf7f86SDimitry Andric // NOTE: Currently we need to update the value of the operand, so the next 1984fcaf7f86SDimitry Andric // predicated iteration inserts its generated value in the correct vector. 1985fcaf7f86SDimitry Andric State.reset(getOperand(0), VPhi, Part); 1986fcaf7f86SDimitry Andric } else { 1987fcaf7f86SDimitry Andric Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType(); 1988fcaf7f86SDimitry Andric PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); 1989fcaf7f86SDimitry Andric Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), 1990fcaf7f86SDimitry Andric PredicatingBB); 1991fcaf7f86SDimitry Andric Phi->addIncoming(ScalarPredInst, PredicatedBB); 1992fcaf7f86SDimitry Andric if (State.hasScalarValue(this, *State.Instance)) 1993fcaf7f86SDimitry Andric State.reset(this, Phi, *State.Instance); 1994fcaf7f86SDimitry Andric else 1995fcaf7f86SDimitry Andric State.set(this, Phi, *State.Instance); 1996fcaf7f86SDimitry Andric // NOTE: Currently we need to update the value of the operand, so the next 1997fcaf7f86SDimitry Andric // predicated iteration inserts its generated value in the correct vector. 1998fcaf7f86SDimitry Andric State.reset(getOperand(0), Phi, *State.Instance); 1999fcaf7f86SDimitry Andric } 2000fcaf7f86SDimitry Andric } 2001fcaf7f86SDimitry Andric 2002753f127fSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 200381ad6265SDimitry Andric void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, 200481ad6265SDimitry Andric VPSlotTracker &SlotTracker) const { 200581ad6265SDimitry Andric O << Indent << "PHI-PREDICATED-INSTRUCTION "; 200681ad6265SDimitry Andric printAsOperand(O, SlotTracker); 200781ad6265SDimitry Andric O << " = "; 200881ad6265SDimitry Andric printOperands(O, SlotTracker); 200981ad6265SDimitry Andric } 201081ad6265SDimitry Andric 2011*0fca6ea1SDimitry Andric void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent, 201281ad6265SDimitry Andric VPSlotTracker &SlotTracker) const { 201381ad6265SDimitry Andric O << Indent << "WIDEN "; 2014*0fca6ea1SDimitry Andric printAsOperand(O, SlotTracker); 2015*0fca6ea1SDimitry Andric O << " = load "; 201681ad6265SDimitry Andric printOperands(O, SlotTracker); 201781ad6265SDimitry Andric } 2018*0fca6ea1SDimitry Andric 2019*0fca6ea1SDimitry Andric void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent, 2020*0fca6ea1SDimitry Andric VPSlotTracker &SlotTracker) const { 2021*0fca6ea1SDimitry Andric O << Indent << "WIDEN "; 2022*0fca6ea1SDimitry Andric printAsOperand(O, SlotTracker); 2023*0fca6ea1SDimitry Andric O << " = vp.load "; 2024*0fca6ea1SDimitry Andric printOperands(O, SlotTracker); 2025*0fca6ea1SDimitry Andric } 2026*0fca6ea1SDimitry Andric 2027*0fca6ea1SDimitry Andric void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent, 2028*0fca6ea1SDimitry Andric VPSlotTracker &SlotTracker) const { 2029*0fca6ea1SDimitry Andric O << Indent << "WIDEN store "; 2030*0fca6ea1SDimitry Andric printOperands(O, SlotTracker); 2031*0fca6ea1SDimitry Andric } 2032*0fca6ea1SDimitry Andric 2033*0fca6ea1SDimitry Andric void VPWidenStoreEVLRecipe::print(raw_ostream &O, const Twine &Indent, 2034*0fca6ea1SDimitry Andric VPSlotTracker &SlotTracker) const { 2035*0fca6ea1SDimitry Andric O << Indent << "WIDEN vp.store "; 2036*0fca6ea1SDimitry Andric printOperands(O, SlotTracker); 2037*0fca6ea1SDimitry Andric } 2038*0fca6ea1SDimitry Andric #endif 2039*0fca6ea1SDimitry Andric 2040*0fca6ea1SDimitry Andric static Value *createBitOrPointerCast(IRBuilderBase &Builder, Value *V, 2041*0fca6ea1SDimitry Andric VectorType *DstVTy, const DataLayout &DL) { 2042*0fca6ea1SDimitry Andric // Verify that V is a vector type with same number of elements as DstVTy. 2043*0fca6ea1SDimitry Andric auto VF = DstVTy->getElementCount(); 2044*0fca6ea1SDimitry Andric auto *SrcVecTy = cast<VectorType>(V->getType()); 2045*0fca6ea1SDimitry Andric assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match"); 2046*0fca6ea1SDimitry Andric Type *SrcElemTy = SrcVecTy->getElementType(); 2047*0fca6ea1SDimitry Andric Type *DstElemTy = DstVTy->getElementType(); 2048*0fca6ea1SDimitry Andric assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) && 2049*0fca6ea1SDimitry Andric "Vector elements must have same size"); 2050*0fca6ea1SDimitry Andric 2051*0fca6ea1SDimitry Andric // Do a direct cast if element types are castable. 2052*0fca6ea1SDimitry Andric if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) { 2053*0fca6ea1SDimitry Andric return Builder.CreateBitOrPointerCast(V, DstVTy); 2054*0fca6ea1SDimitry Andric } 2055*0fca6ea1SDimitry Andric // V cannot be directly casted to desired vector type. 2056*0fca6ea1SDimitry Andric // May happen when V is a floating point vector but DstVTy is a vector of 2057*0fca6ea1SDimitry Andric // pointers or vice-versa. Handle this using a two-step bitcast using an 2058*0fca6ea1SDimitry Andric // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float. 2059*0fca6ea1SDimitry Andric assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) && 2060*0fca6ea1SDimitry Andric "Only one type should be a pointer type"); 2061*0fca6ea1SDimitry Andric assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) && 2062*0fca6ea1SDimitry Andric "Only one type should be a floating point type"); 2063*0fca6ea1SDimitry Andric Type *IntTy = 2064*0fca6ea1SDimitry Andric IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy)); 2065*0fca6ea1SDimitry Andric auto *VecIntTy = VectorType::get(IntTy, VF); 2066*0fca6ea1SDimitry Andric Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy); 2067*0fca6ea1SDimitry Andric return Builder.CreateBitOrPointerCast(CastVal, DstVTy); 2068*0fca6ea1SDimitry Andric } 2069*0fca6ea1SDimitry Andric 2070*0fca6ea1SDimitry Andric /// Return a vector containing interleaved elements from multiple 2071*0fca6ea1SDimitry Andric /// smaller input vectors. 2072*0fca6ea1SDimitry Andric static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals, 2073*0fca6ea1SDimitry Andric const Twine &Name) { 2074*0fca6ea1SDimitry Andric unsigned Factor = Vals.size(); 2075*0fca6ea1SDimitry Andric assert(Factor > 1 && "Tried to interleave invalid number of vectors"); 2076*0fca6ea1SDimitry Andric 2077*0fca6ea1SDimitry Andric VectorType *VecTy = cast<VectorType>(Vals[0]->getType()); 2078*0fca6ea1SDimitry Andric #ifndef NDEBUG 2079*0fca6ea1SDimitry Andric for (Value *Val : Vals) 2080*0fca6ea1SDimitry Andric assert(Val->getType() == VecTy && "Tried to interleave mismatched types"); 2081*0fca6ea1SDimitry Andric #endif 2082*0fca6ea1SDimitry Andric 2083*0fca6ea1SDimitry Andric // Scalable vectors cannot use arbitrary shufflevectors (only splats), so 2084*0fca6ea1SDimitry Andric // must use intrinsics to interleave. 2085*0fca6ea1SDimitry Andric if (VecTy->isScalableTy()) { 2086*0fca6ea1SDimitry Andric VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy); 2087*0fca6ea1SDimitry Andric return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2, 2088*0fca6ea1SDimitry Andric Vals, 2089*0fca6ea1SDimitry Andric /*FMFSource=*/nullptr, Name); 2090*0fca6ea1SDimitry Andric } 2091*0fca6ea1SDimitry Andric 2092*0fca6ea1SDimitry Andric // Fixed length. Start by concatenating all vectors into a wide vector. 2093*0fca6ea1SDimitry Andric Value *WideVec = concatenateVectors(Builder, Vals); 2094*0fca6ea1SDimitry Andric 2095*0fca6ea1SDimitry Andric // Interleave the elements into the wide vector. 2096*0fca6ea1SDimitry Andric const unsigned NumElts = VecTy->getElementCount().getFixedValue(); 2097*0fca6ea1SDimitry Andric return Builder.CreateShuffleVector( 2098*0fca6ea1SDimitry Andric WideVec, createInterleaveMask(NumElts, Factor), Name); 2099*0fca6ea1SDimitry Andric } 2100*0fca6ea1SDimitry Andric 2101*0fca6ea1SDimitry Andric // Try to vectorize the interleave group that \p Instr belongs to. 2102*0fca6ea1SDimitry Andric // 2103*0fca6ea1SDimitry Andric // E.g. Translate following interleaved load group (factor = 3): 2104*0fca6ea1SDimitry Andric // for (i = 0; i < N; i+=3) { 2105*0fca6ea1SDimitry Andric // R = Pic[i]; // Member of index 0 2106*0fca6ea1SDimitry Andric // G = Pic[i+1]; // Member of index 1 2107*0fca6ea1SDimitry Andric // B = Pic[i+2]; // Member of index 2 2108*0fca6ea1SDimitry Andric // ... // do something to R, G, B 2109*0fca6ea1SDimitry Andric // } 2110*0fca6ea1SDimitry Andric // To: 2111*0fca6ea1SDimitry Andric // %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B 2112*0fca6ea1SDimitry Andric // %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9> ; R elements 2113*0fca6ea1SDimitry Andric // %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10> ; G elements 2114*0fca6ea1SDimitry Andric // %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11> ; B elements 2115*0fca6ea1SDimitry Andric // 2116*0fca6ea1SDimitry Andric // Or translate following interleaved store group (factor = 3): 2117*0fca6ea1SDimitry Andric // for (i = 0; i < N; i+=3) { 2118*0fca6ea1SDimitry Andric // ... do something to R, G, B 2119*0fca6ea1SDimitry Andric // Pic[i] = R; // Member of index 0 2120*0fca6ea1SDimitry Andric // Pic[i+1] = G; // Member of index 1 2121*0fca6ea1SDimitry Andric // Pic[i+2] = B; // Member of index 2 2122*0fca6ea1SDimitry Andric // } 2123*0fca6ea1SDimitry Andric // To: 2124*0fca6ea1SDimitry Andric // %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7> 2125*0fca6ea1SDimitry Andric // %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u> 2126*0fca6ea1SDimitry Andric // %interleaved.vec = shuffle %R_G.vec, %B_U.vec, 2127*0fca6ea1SDimitry Andric // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements 2128*0fca6ea1SDimitry Andric // store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B 2129*0fca6ea1SDimitry Andric void VPInterleaveRecipe::execute(VPTransformState &State) { 2130*0fca6ea1SDimitry Andric assert(!State.Instance && "Interleave group being replicated."); 2131*0fca6ea1SDimitry Andric const InterleaveGroup<Instruction> *Group = IG; 2132*0fca6ea1SDimitry Andric Instruction *Instr = Group->getInsertPos(); 2133*0fca6ea1SDimitry Andric 2134*0fca6ea1SDimitry Andric // Prepare for the vector type of the interleaved load/store. 2135*0fca6ea1SDimitry Andric Type *ScalarTy = getLoadStoreType(Instr); 2136*0fca6ea1SDimitry Andric unsigned InterleaveFactor = Group->getFactor(); 2137*0fca6ea1SDimitry Andric auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor); 2138*0fca6ea1SDimitry Andric 2139*0fca6ea1SDimitry Andric // Prepare for the new pointers. 2140*0fca6ea1SDimitry Andric SmallVector<Value *, 2> AddrParts; 2141*0fca6ea1SDimitry Andric unsigned Index = Group->getIndex(Instr); 2142*0fca6ea1SDimitry Andric 2143*0fca6ea1SDimitry Andric // TODO: extend the masked interleaved-group support to reversed access. 2144*0fca6ea1SDimitry Andric VPValue *BlockInMask = getMask(); 2145*0fca6ea1SDimitry Andric assert((!BlockInMask || !Group->isReverse()) && 2146*0fca6ea1SDimitry Andric "Reversed masked interleave-group not supported."); 2147*0fca6ea1SDimitry Andric 2148*0fca6ea1SDimitry Andric Value *Idx; 2149*0fca6ea1SDimitry Andric // If the group is reverse, adjust the index to refer to the last vector lane 2150*0fca6ea1SDimitry Andric // instead of the first. We adjust the index from the first vector lane, 2151*0fca6ea1SDimitry Andric // rather than directly getting the pointer for lane VF - 1, because the 2152*0fca6ea1SDimitry Andric // pointer operand of the interleaved access is supposed to be uniform. For 2153*0fca6ea1SDimitry Andric // uniform instructions, we're only required to generate a value for the 2154*0fca6ea1SDimitry Andric // first vector lane in each unroll iteration. 2155*0fca6ea1SDimitry Andric if (Group->isReverse()) { 2156*0fca6ea1SDimitry Andric Value *RuntimeVF = 2157*0fca6ea1SDimitry Andric getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF); 2158*0fca6ea1SDimitry Andric Idx = State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1)); 2159*0fca6ea1SDimitry Andric Idx = State.Builder.CreateMul(Idx, 2160*0fca6ea1SDimitry Andric State.Builder.getInt32(Group->getFactor())); 2161*0fca6ea1SDimitry Andric Idx = State.Builder.CreateAdd(Idx, State.Builder.getInt32(Index)); 2162*0fca6ea1SDimitry Andric Idx = State.Builder.CreateNeg(Idx); 2163*0fca6ea1SDimitry Andric } else 2164*0fca6ea1SDimitry Andric Idx = State.Builder.getInt32(-Index); 2165*0fca6ea1SDimitry Andric 2166*0fca6ea1SDimitry Andric VPValue *Addr = getAddr(); 2167*0fca6ea1SDimitry Andric for (unsigned Part = 0; Part < State.UF; Part++) { 2168*0fca6ea1SDimitry Andric Value *AddrPart = State.get(Addr, VPIteration(Part, 0)); 2169*0fca6ea1SDimitry Andric if (auto *I = dyn_cast<Instruction>(AddrPart)) 2170*0fca6ea1SDimitry Andric State.setDebugLocFrom(I->getDebugLoc()); 2171*0fca6ea1SDimitry Andric 2172*0fca6ea1SDimitry Andric // Notice current instruction could be any index. Need to adjust the address 2173*0fca6ea1SDimitry Andric // to the member of index 0. 2174*0fca6ea1SDimitry Andric // 2175*0fca6ea1SDimitry Andric // E.g. a = A[i+1]; // Member of index 1 (Current instruction) 2176*0fca6ea1SDimitry Andric // b = A[i]; // Member of index 0 2177*0fca6ea1SDimitry Andric // Current pointer is pointed to A[i+1], adjust it to A[i]. 2178*0fca6ea1SDimitry Andric // 2179*0fca6ea1SDimitry Andric // E.g. A[i+1] = a; // Member of index 1 2180*0fca6ea1SDimitry Andric // A[i] = b; // Member of index 0 2181*0fca6ea1SDimitry Andric // A[i+2] = c; // Member of index 2 (Current instruction) 2182*0fca6ea1SDimitry Andric // Current pointer is pointed to A[i+2], adjust it to A[i]. 2183*0fca6ea1SDimitry Andric 2184*0fca6ea1SDimitry Andric bool InBounds = false; 2185*0fca6ea1SDimitry Andric if (auto *gep = dyn_cast<GetElementPtrInst>(AddrPart->stripPointerCasts())) 2186*0fca6ea1SDimitry Andric InBounds = gep->isInBounds(); 2187*0fca6ea1SDimitry Andric AddrPart = State.Builder.CreateGEP(ScalarTy, AddrPart, Idx, "", InBounds); 2188*0fca6ea1SDimitry Andric AddrParts.push_back(AddrPart); 2189*0fca6ea1SDimitry Andric } 2190*0fca6ea1SDimitry Andric 2191*0fca6ea1SDimitry Andric State.setDebugLocFrom(Instr->getDebugLoc()); 2192*0fca6ea1SDimitry Andric Value *PoisonVec = PoisonValue::get(VecTy); 2193*0fca6ea1SDimitry Andric 2194*0fca6ea1SDimitry Andric auto CreateGroupMask = [&BlockInMask, &State, &InterleaveFactor]( 2195*0fca6ea1SDimitry Andric unsigned Part, Value *MaskForGaps) -> Value * { 2196*0fca6ea1SDimitry Andric if (State.VF.isScalable()) { 2197*0fca6ea1SDimitry Andric assert(!MaskForGaps && "Interleaved groups with gaps are not supported."); 2198*0fca6ea1SDimitry Andric assert(InterleaveFactor == 2 && 2199*0fca6ea1SDimitry Andric "Unsupported deinterleave factor for scalable vectors"); 2200*0fca6ea1SDimitry Andric auto *BlockInMaskPart = State.get(BlockInMask, Part); 2201*0fca6ea1SDimitry Andric SmallVector<Value *, 2> Ops = {BlockInMaskPart, BlockInMaskPart}; 2202*0fca6ea1SDimitry Andric auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(), 2203*0fca6ea1SDimitry Andric State.VF.getKnownMinValue() * 2, true); 2204*0fca6ea1SDimitry Andric return State.Builder.CreateIntrinsic( 2205*0fca6ea1SDimitry Andric MaskTy, Intrinsic::vector_interleave2, Ops, 2206*0fca6ea1SDimitry Andric /*FMFSource=*/nullptr, "interleaved.mask"); 2207*0fca6ea1SDimitry Andric } 2208*0fca6ea1SDimitry Andric 2209*0fca6ea1SDimitry Andric if (!BlockInMask) 2210*0fca6ea1SDimitry Andric return MaskForGaps; 2211*0fca6ea1SDimitry Andric 2212*0fca6ea1SDimitry Andric Value *BlockInMaskPart = State.get(BlockInMask, Part); 2213*0fca6ea1SDimitry Andric Value *ShuffledMask = State.Builder.CreateShuffleVector( 2214*0fca6ea1SDimitry Andric BlockInMaskPart, 2215*0fca6ea1SDimitry Andric createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()), 2216*0fca6ea1SDimitry Andric "interleaved.mask"); 2217*0fca6ea1SDimitry Andric return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And, 2218*0fca6ea1SDimitry Andric ShuffledMask, MaskForGaps) 2219*0fca6ea1SDimitry Andric : ShuffledMask; 2220*0fca6ea1SDimitry Andric }; 2221*0fca6ea1SDimitry Andric 2222*0fca6ea1SDimitry Andric const DataLayout &DL = Instr->getDataLayout(); 2223*0fca6ea1SDimitry Andric // Vectorize the interleaved load group. 2224*0fca6ea1SDimitry Andric if (isa<LoadInst>(Instr)) { 2225*0fca6ea1SDimitry Andric Value *MaskForGaps = nullptr; 2226*0fca6ea1SDimitry Andric if (NeedsMaskForGaps) { 2227*0fca6ea1SDimitry Andric MaskForGaps = createBitMaskForGaps(State.Builder, 2228*0fca6ea1SDimitry Andric State.VF.getKnownMinValue(), *Group); 2229*0fca6ea1SDimitry Andric assert(MaskForGaps && "Mask for Gaps is required but it is null"); 2230*0fca6ea1SDimitry Andric } 2231*0fca6ea1SDimitry Andric 2232*0fca6ea1SDimitry Andric // For each unroll part, create a wide load for the group. 2233*0fca6ea1SDimitry Andric SmallVector<Value *, 2> NewLoads; 2234*0fca6ea1SDimitry Andric for (unsigned Part = 0; Part < State.UF; Part++) { 2235*0fca6ea1SDimitry Andric Instruction *NewLoad; 2236*0fca6ea1SDimitry Andric if (BlockInMask || MaskForGaps) { 2237*0fca6ea1SDimitry Andric Value *GroupMask = CreateGroupMask(Part, MaskForGaps); 2238*0fca6ea1SDimitry Andric NewLoad = State.Builder.CreateMaskedLoad(VecTy, AddrParts[Part], 2239*0fca6ea1SDimitry Andric Group->getAlign(), GroupMask, 2240*0fca6ea1SDimitry Andric PoisonVec, "wide.masked.vec"); 2241*0fca6ea1SDimitry Andric } else 2242*0fca6ea1SDimitry Andric NewLoad = State.Builder.CreateAlignedLoad( 2243*0fca6ea1SDimitry Andric VecTy, AddrParts[Part], Group->getAlign(), "wide.vec"); 2244*0fca6ea1SDimitry Andric Group->addMetadata(NewLoad); 2245*0fca6ea1SDimitry Andric NewLoads.push_back(NewLoad); 2246*0fca6ea1SDimitry Andric } 2247*0fca6ea1SDimitry Andric 2248*0fca6ea1SDimitry Andric ArrayRef<VPValue *> VPDefs = definedValues(); 2249*0fca6ea1SDimitry Andric const DataLayout &DL = State.CFG.PrevBB->getDataLayout(); 2250*0fca6ea1SDimitry Andric if (VecTy->isScalableTy()) { 2251*0fca6ea1SDimitry Andric assert(InterleaveFactor == 2 && 2252*0fca6ea1SDimitry Andric "Unsupported deinterleave factor for scalable vectors"); 2253*0fca6ea1SDimitry Andric 2254*0fca6ea1SDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) { 2255*0fca6ea1SDimitry Andric // Scalable vectors cannot use arbitrary shufflevectors (only splats), 2256*0fca6ea1SDimitry Andric // so must use intrinsics to deinterleave. 2257*0fca6ea1SDimitry Andric Value *DI = State.Builder.CreateIntrinsic( 2258*0fca6ea1SDimitry Andric Intrinsic::vector_deinterleave2, VecTy, NewLoads[Part], 2259*0fca6ea1SDimitry Andric /*FMFSource=*/nullptr, "strided.vec"); 2260*0fca6ea1SDimitry Andric unsigned J = 0; 2261*0fca6ea1SDimitry Andric for (unsigned I = 0; I < InterleaveFactor; ++I) { 2262*0fca6ea1SDimitry Andric Instruction *Member = Group->getMember(I); 2263*0fca6ea1SDimitry Andric 2264*0fca6ea1SDimitry Andric if (!Member) 2265*0fca6ea1SDimitry Andric continue; 2266*0fca6ea1SDimitry Andric 2267*0fca6ea1SDimitry Andric Value *StridedVec = State.Builder.CreateExtractValue(DI, I); 2268*0fca6ea1SDimitry Andric // If this member has different type, cast the result type. 2269*0fca6ea1SDimitry Andric if (Member->getType() != ScalarTy) { 2270*0fca6ea1SDimitry Andric VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF); 2271*0fca6ea1SDimitry Andric StridedVec = 2272*0fca6ea1SDimitry Andric createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL); 2273*0fca6ea1SDimitry Andric } 2274*0fca6ea1SDimitry Andric 2275*0fca6ea1SDimitry Andric if (Group->isReverse()) 2276*0fca6ea1SDimitry Andric StridedVec = 2277*0fca6ea1SDimitry Andric State.Builder.CreateVectorReverse(StridedVec, "reverse"); 2278*0fca6ea1SDimitry Andric 2279*0fca6ea1SDimitry Andric State.set(VPDefs[J], StridedVec, Part); 2280*0fca6ea1SDimitry Andric ++J; 2281*0fca6ea1SDimitry Andric } 2282*0fca6ea1SDimitry Andric } 2283*0fca6ea1SDimitry Andric 2284*0fca6ea1SDimitry Andric return; 2285*0fca6ea1SDimitry Andric } 2286*0fca6ea1SDimitry Andric 2287*0fca6ea1SDimitry Andric // For each member in the group, shuffle out the appropriate data from the 2288*0fca6ea1SDimitry Andric // wide loads. 2289*0fca6ea1SDimitry Andric unsigned J = 0; 2290*0fca6ea1SDimitry Andric for (unsigned I = 0; I < InterleaveFactor; ++I) { 2291*0fca6ea1SDimitry Andric Instruction *Member = Group->getMember(I); 2292*0fca6ea1SDimitry Andric 2293*0fca6ea1SDimitry Andric // Skip the gaps in the group. 2294*0fca6ea1SDimitry Andric if (!Member) 2295*0fca6ea1SDimitry Andric continue; 2296*0fca6ea1SDimitry Andric 2297*0fca6ea1SDimitry Andric auto StrideMask = 2298*0fca6ea1SDimitry Andric createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue()); 2299*0fca6ea1SDimitry Andric for (unsigned Part = 0; Part < State.UF; Part++) { 2300*0fca6ea1SDimitry Andric Value *StridedVec = State.Builder.CreateShuffleVector( 2301*0fca6ea1SDimitry Andric NewLoads[Part], StrideMask, "strided.vec"); 2302*0fca6ea1SDimitry Andric 2303*0fca6ea1SDimitry Andric // If this member has different type, cast the result type. 2304*0fca6ea1SDimitry Andric if (Member->getType() != ScalarTy) { 2305*0fca6ea1SDimitry Andric assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); 2306*0fca6ea1SDimitry Andric VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF); 2307*0fca6ea1SDimitry Andric StridedVec = 2308*0fca6ea1SDimitry Andric createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL); 2309*0fca6ea1SDimitry Andric } 2310*0fca6ea1SDimitry Andric 2311*0fca6ea1SDimitry Andric if (Group->isReverse()) 2312*0fca6ea1SDimitry Andric StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse"); 2313*0fca6ea1SDimitry Andric 2314*0fca6ea1SDimitry Andric State.set(VPDefs[J], StridedVec, Part); 2315*0fca6ea1SDimitry Andric } 2316*0fca6ea1SDimitry Andric ++J; 2317*0fca6ea1SDimitry Andric } 2318*0fca6ea1SDimitry Andric return; 2319*0fca6ea1SDimitry Andric } 2320*0fca6ea1SDimitry Andric 2321*0fca6ea1SDimitry Andric // The sub vector type for current instruction. 2322*0fca6ea1SDimitry Andric auto *SubVT = VectorType::get(ScalarTy, State.VF); 2323*0fca6ea1SDimitry Andric 2324*0fca6ea1SDimitry Andric // Vectorize the interleaved store group. 2325*0fca6ea1SDimitry Andric Value *MaskForGaps = 2326*0fca6ea1SDimitry Andric createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group); 2327*0fca6ea1SDimitry Andric assert((!MaskForGaps || !State.VF.isScalable()) && 2328*0fca6ea1SDimitry Andric "masking gaps for scalable vectors is not yet supported."); 2329*0fca6ea1SDimitry Andric ArrayRef<VPValue *> StoredValues = getStoredValues(); 2330*0fca6ea1SDimitry Andric for (unsigned Part = 0; Part < State.UF; Part++) { 2331*0fca6ea1SDimitry Andric // Collect the stored vector from each member. 2332*0fca6ea1SDimitry Andric SmallVector<Value *, 4> StoredVecs; 2333*0fca6ea1SDimitry Andric unsigned StoredIdx = 0; 2334*0fca6ea1SDimitry Andric for (unsigned i = 0; i < InterleaveFactor; i++) { 2335*0fca6ea1SDimitry Andric assert((Group->getMember(i) || MaskForGaps) && 2336*0fca6ea1SDimitry Andric "Fail to get a member from an interleaved store group"); 2337*0fca6ea1SDimitry Andric Instruction *Member = Group->getMember(i); 2338*0fca6ea1SDimitry Andric 2339*0fca6ea1SDimitry Andric // Skip the gaps in the group. 2340*0fca6ea1SDimitry Andric if (!Member) { 2341*0fca6ea1SDimitry Andric Value *Undef = PoisonValue::get(SubVT); 2342*0fca6ea1SDimitry Andric StoredVecs.push_back(Undef); 2343*0fca6ea1SDimitry Andric continue; 2344*0fca6ea1SDimitry Andric } 2345*0fca6ea1SDimitry Andric 2346*0fca6ea1SDimitry Andric Value *StoredVec = State.get(StoredValues[StoredIdx], Part); 2347*0fca6ea1SDimitry Andric ++StoredIdx; 2348*0fca6ea1SDimitry Andric 2349*0fca6ea1SDimitry Andric if (Group->isReverse()) 2350*0fca6ea1SDimitry Andric StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse"); 2351*0fca6ea1SDimitry Andric 2352*0fca6ea1SDimitry Andric // If this member has different type, cast it to a unified type. 2353*0fca6ea1SDimitry Andric 2354*0fca6ea1SDimitry Andric if (StoredVec->getType() != SubVT) 2355*0fca6ea1SDimitry Andric StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL); 2356*0fca6ea1SDimitry Andric 2357*0fca6ea1SDimitry Andric StoredVecs.push_back(StoredVec); 2358*0fca6ea1SDimitry Andric } 2359*0fca6ea1SDimitry Andric 2360*0fca6ea1SDimitry Andric // Interleave all the smaller vectors into one wider vector. 2361*0fca6ea1SDimitry Andric Value *IVec = 2362*0fca6ea1SDimitry Andric interleaveVectors(State.Builder, StoredVecs, "interleaved.vec"); 2363*0fca6ea1SDimitry Andric Instruction *NewStoreInstr; 2364*0fca6ea1SDimitry Andric if (BlockInMask || MaskForGaps) { 2365*0fca6ea1SDimitry Andric Value *GroupMask = CreateGroupMask(Part, MaskForGaps); 2366*0fca6ea1SDimitry Andric NewStoreInstr = State.Builder.CreateMaskedStore( 2367*0fca6ea1SDimitry Andric IVec, AddrParts[Part], Group->getAlign(), GroupMask); 2368*0fca6ea1SDimitry Andric } else 2369*0fca6ea1SDimitry Andric NewStoreInstr = State.Builder.CreateAlignedStore(IVec, AddrParts[Part], 2370*0fca6ea1SDimitry Andric Group->getAlign()); 2371*0fca6ea1SDimitry Andric 2372*0fca6ea1SDimitry Andric Group->addMetadata(NewStoreInstr); 2373*0fca6ea1SDimitry Andric } 2374*0fca6ea1SDimitry Andric } 2375*0fca6ea1SDimitry Andric 2376*0fca6ea1SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2377*0fca6ea1SDimitry Andric void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent, 2378*0fca6ea1SDimitry Andric VPSlotTracker &SlotTracker) const { 2379*0fca6ea1SDimitry Andric O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at "; 2380*0fca6ea1SDimitry Andric IG->getInsertPos()->printAsOperand(O, false); 2381*0fca6ea1SDimitry Andric O << ", "; 2382*0fca6ea1SDimitry Andric getAddr()->printAsOperand(O, SlotTracker); 2383*0fca6ea1SDimitry Andric VPValue *Mask = getMask(); 2384*0fca6ea1SDimitry Andric if (Mask) { 2385*0fca6ea1SDimitry Andric O << ", "; 2386*0fca6ea1SDimitry Andric Mask->printAsOperand(O, SlotTracker); 2387*0fca6ea1SDimitry Andric } 2388*0fca6ea1SDimitry Andric 2389*0fca6ea1SDimitry Andric unsigned OpIdx = 0; 2390*0fca6ea1SDimitry Andric for (unsigned i = 0; i < IG->getFactor(); ++i) { 2391*0fca6ea1SDimitry Andric if (!IG->getMember(i)) 2392*0fca6ea1SDimitry Andric continue; 2393*0fca6ea1SDimitry Andric if (getNumStoreOperands() > 0) { 2394*0fca6ea1SDimitry Andric O << "\n" << Indent << " store "; 2395*0fca6ea1SDimitry Andric getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker); 2396*0fca6ea1SDimitry Andric O << " to index " << i; 2397*0fca6ea1SDimitry Andric } else { 2398*0fca6ea1SDimitry Andric O << "\n" << Indent << " "; 2399*0fca6ea1SDimitry Andric getVPValue(OpIdx)->printAsOperand(O, SlotTracker); 2400*0fca6ea1SDimitry Andric O << " = load from index " << i; 2401*0fca6ea1SDimitry Andric } 2402*0fca6ea1SDimitry Andric ++OpIdx; 2403*0fca6ea1SDimitry Andric } 2404*0fca6ea1SDimitry Andric } 240581ad6265SDimitry Andric #endif 240681ad6265SDimitry Andric 240781ad6265SDimitry Andric void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { 240881ad6265SDimitry Andric Value *Start = getStartValue()->getLiveInIRValue(); 24095f757f3fSDimitry Andric PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index"); 24105f757f3fSDimitry Andric EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); 241181ad6265SDimitry Andric 241281ad6265SDimitry Andric BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 241381ad6265SDimitry Andric EntryPart->addIncoming(Start, VectorPH); 24145f757f3fSDimitry Andric EntryPart->setDebugLoc(getDebugLoc()); 241581ad6265SDimitry Andric for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 2416*0fca6ea1SDimitry Andric State.set(this, EntryPart, Part, /*IsScalar*/ true); 241781ad6265SDimitry Andric } 241881ad6265SDimitry Andric 241981ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 242081ad6265SDimitry Andric void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 242181ad6265SDimitry Andric VPSlotTracker &SlotTracker) const { 242281ad6265SDimitry Andric O << Indent << "EMIT "; 242381ad6265SDimitry Andric printAsOperand(O, SlotTracker); 242481ad6265SDimitry Andric O << " = CANONICAL-INDUCTION "; 24255f757f3fSDimitry Andric printOperands(O, SlotTracker); 242681ad6265SDimitry Andric } 242781ad6265SDimitry Andric #endif 242881ad6265SDimitry Andric 242906c3fb27SDimitry Andric bool VPCanonicalIVPHIRecipe::isCanonical( 2430*0fca6ea1SDimitry Andric InductionDescriptor::InductionKind Kind, VPValue *Start, 2431*0fca6ea1SDimitry Andric VPValue *Step) const { 2432*0fca6ea1SDimitry Andric // Must be an integer induction. 2433*0fca6ea1SDimitry Andric if (Kind != InductionDescriptor::IK_IntInduction) 2434bdd1243dSDimitry Andric return false; 243506c3fb27SDimitry Andric // Start must match the start value of this canonical induction. 243606c3fb27SDimitry Andric if (Start != getStartValue()) 2437bdd1243dSDimitry Andric return false; 2438bdd1243dSDimitry Andric 243906c3fb27SDimitry Andric // If the step is defined by a recipe, it is not a ConstantInt. 244006c3fb27SDimitry Andric if (Step->getDefiningRecipe()) 244106c3fb27SDimitry Andric return false; 244206c3fb27SDimitry Andric 244306c3fb27SDimitry Andric ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue()); 244406c3fb27SDimitry Andric return StepC && StepC->isOne(); 2445bdd1243dSDimitry Andric } 2446bdd1243dSDimitry Andric 2447*0fca6ea1SDimitry Andric bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) { 24486246ae0bSDimitry Andric return IsScalarAfterVectorization && 2449*0fca6ea1SDimitry Andric (!IsScalable || vputils::onlyFirstLaneUsed(this)); 245081ad6265SDimitry Andric } 245181ad6265SDimitry Andric 245281ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 245381ad6265SDimitry Andric void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, 245481ad6265SDimitry Andric VPSlotTracker &SlotTracker) const { 245581ad6265SDimitry Andric O << Indent << "EMIT "; 245681ad6265SDimitry Andric printAsOperand(O, SlotTracker); 245781ad6265SDimitry Andric O << " = WIDEN-POINTER-INDUCTION "; 245881ad6265SDimitry Andric getStartValue()->printAsOperand(O, SlotTracker); 245981ad6265SDimitry Andric O << ", " << *IndDesc.getStep(); 246081ad6265SDimitry Andric } 246181ad6265SDimitry Andric #endif 246281ad6265SDimitry Andric 246381ad6265SDimitry Andric void VPExpandSCEVRecipe::execute(VPTransformState &State) { 246481ad6265SDimitry Andric assert(!State.Instance && "cannot be used in per-lane"); 2465*0fca6ea1SDimitry Andric const DataLayout &DL = State.CFG.PrevBB->getDataLayout(); 246681ad6265SDimitry Andric SCEVExpander Exp(SE, DL, "induction"); 246781ad6265SDimitry Andric 246881ad6265SDimitry Andric Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), 246981ad6265SDimitry Andric &*State.Builder.GetInsertPoint()); 247006c3fb27SDimitry Andric assert(!State.ExpandedSCEVs.contains(Expr) && 247106c3fb27SDimitry Andric "Same SCEV expanded multiple times"); 247206c3fb27SDimitry Andric State.ExpandedSCEVs[Expr] = Res; 247381ad6265SDimitry Andric for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 247406c3fb27SDimitry Andric State.set(this, Res, {Part, 0}); 247581ad6265SDimitry Andric } 247681ad6265SDimitry Andric 247781ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 247881ad6265SDimitry Andric void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent, 247981ad6265SDimitry Andric VPSlotTracker &SlotTracker) const { 248081ad6265SDimitry Andric O << Indent << "EMIT "; 248181ad6265SDimitry Andric getVPSingleValue()->printAsOperand(O, SlotTracker); 248281ad6265SDimitry Andric O << " = EXPAND SCEV " << *Expr; 248381ad6265SDimitry Andric } 248481ad6265SDimitry Andric #endif 248581ad6265SDimitry Andric 248681ad6265SDimitry Andric void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { 2487*0fca6ea1SDimitry Andric Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true); 248881ad6265SDimitry Andric Type *STy = CanonicalIV->getType(); 248981ad6265SDimitry Andric IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 249081ad6265SDimitry Andric ElementCount VF = State.VF; 249181ad6265SDimitry Andric Value *VStart = VF.isScalar() 249281ad6265SDimitry Andric ? CanonicalIV 249381ad6265SDimitry Andric : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast"); 249481ad6265SDimitry Andric for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 249581ad6265SDimitry Andric Value *VStep = createStepForVF(Builder, STy, VF, Part); 249681ad6265SDimitry Andric if (VF.isVector()) { 249781ad6265SDimitry Andric VStep = Builder.CreateVectorSplat(VF, VStep); 249881ad6265SDimitry Andric VStep = 249981ad6265SDimitry Andric Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType())); 250081ad6265SDimitry Andric } 250181ad6265SDimitry Andric Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); 250281ad6265SDimitry Andric State.set(this, CanonicalVectorIV, Part); 250381ad6265SDimitry Andric } 250481ad6265SDimitry Andric } 250581ad6265SDimitry Andric 250681ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 250781ad6265SDimitry Andric void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, 250881ad6265SDimitry Andric VPSlotTracker &SlotTracker) const { 250981ad6265SDimitry Andric O << Indent << "EMIT "; 251081ad6265SDimitry Andric printAsOperand(O, SlotTracker); 251181ad6265SDimitry Andric O << " = WIDEN-CANONICAL-INDUCTION "; 251281ad6265SDimitry Andric printOperands(O, SlotTracker); 251381ad6265SDimitry Andric } 251481ad6265SDimitry Andric #endif 251581ad6265SDimitry Andric 251681ad6265SDimitry Andric void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { 251781ad6265SDimitry Andric auto &Builder = State.Builder; 251881ad6265SDimitry Andric // Create a vector from the initial value. 251981ad6265SDimitry Andric auto *VectorInit = getStartValue()->getLiveInIRValue(); 252081ad6265SDimitry Andric 252181ad6265SDimitry Andric Type *VecTy = State.VF.isScalar() 252281ad6265SDimitry Andric ? VectorInit->getType() 252381ad6265SDimitry Andric : VectorType::get(VectorInit->getType(), State.VF); 252481ad6265SDimitry Andric 252581ad6265SDimitry Andric BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 252681ad6265SDimitry Andric if (State.VF.isVector()) { 252781ad6265SDimitry Andric auto *IdxTy = Builder.getInt32Ty(); 252881ad6265SDimitry Andric auto *One = ConstantInt::get(IdxTy, 1); 252981ad6265SDimitry Andric IRBuilder<>::InsertPointGuard Guard(Builder); 253081ad6265SDimitry Andric Builder.SetInsertPoint(VectorPH->getTerminator()); 253181ad6265SDimitry Andric auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); 253281ad6265SDimitry Andric auto *LastIdx = Builder.CreateSub(RuntimeVF, One); 253381ad6265SDimitry Andric VectorInit = Builder.CreateInsertElement( 253481ad6265SDimitry Andric PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); 253581ad6265SDimitry Andric } 253681ad6265SDimitry Andric 253781ad6265SDimitry Andric // Create a phi node for the new recurrence. 25385f757f3fSDimitry Andric PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur"); 25395f757f3fSDimitry Andric EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); 254081ad6265SDimitry Andric EntryPart->addIncoming(VectorInit, VectorPH); 254181ad6265SDimitry Andric State.set(this, EntryPart, 0); 254281ad6265SDimitry Andric } 254381ad6265SDimitry Andric 254481ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 254581ad6265SDimitry Andric void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, 254681ad6265SDimitry Andric VPSlotTracker &SlotTracker) const { 254781ad6265SDimitry Andric O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; 254881ad6265SDimitry Andric printAsOperand(O, SlotTracker); 254981ad6265SDimitry Andric O << " = phi "; 255081ad6265SDimitry Andric printOperands(O, SlotTracker); 255181ad6265SDimitry Andric } 255281ad6265SDimitry Andric #endif 255381ad6265SDimitry Andric 255481ad6265SDimitry Andric void VPReductionPHIRecipe::execute(VPTransformState &State) { 255581ad6265SDimitry Andric auto &Builder = State.Builder; 255681ad6265SDimitry Andric 25577a6dacacSDimitry Andric // Reductions do not have to start at zero. They can start with 25587a6dacacSDimitry Andric // any loop invariant values. 25597a6dacacSDimitry Andric VPValue *StartVPV = getStartValue(); 25607a6dacacSDimitry Andric Value *StartV = StartVPV->getLiveInIRValue(); 25617a6dacacSDimitry Andric 256281ad6265SDimitry Andric // In order to support recurrences we need to be able to vectorize Phi nodes. 256381ad6265SDimitry Andric // Phi nodes have cycles, so we need to vectorize them in two stages. This is 256481ad6265SDimitry Andric // stage #1: We create a new vector PHI node with no incoming edges. We'll use 256581ad6265SDimitry Andric // this value when we vectorize all of the instructions that use the PHI. 256681ad6265SDimitry Andric bool ScalarPHI = State.VF.isScalar() || IsInLoop; 25677a6dacacSDimitry Andric Type *VecTy = ScalarPHI ? StartV->getType() 25687a6dacacSDimitry Andric : VectorType::get(StartV->getType(), State.VF); 256981ad6265SDimitry Andric 257081ad6265SDimitry Andric BasicBlock *HeaderBB = State.CFG.PrevBB; 257181ad6265SDimitry Andric assert(State.CurrentVectorLoop->getHeader() == HeaderBB && 257281ad6265SDimitry Andric "recipe must be in the vector loop header"); 257381ad6265SDimitry Andric unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF; 257481ad6265SDimitry Andric for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 25755f757f3fSDimitry Andric Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi"); 25765f757f3fSDimitry Andric EntryPart->insertBefore(HeaderBB->getFirstInsertionPt()); 2577*0fca6ea1SDimitry Andric State.set(this, EntryPart, Part, IsInLoop); 257881ad6265SDimitry Andric } 257981ad6265SDimitry Andric 258081ad6265SDimitry Andric BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 258181ad6265SDimitry Andric 258281ad6265SDimitry Andric Value *Iden = nullptr; 258381ad6265SDimitry Andric RecurKind RK = RdxDesc.getRecurrenceKind(); 258481ad6265SDimitry Andric if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || 25855f757f3fSDimitry Andric RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) { 25865f757f3fSDimitry Andric // MinMax and AnyOf reductions have the start value as their identity. 258781ad6265SDimitry Andric if (ScalarPHI) { 258881ad6265SDimitry Andric Iden = StartV; 258981ad6265SDimitry Andric } else { 259081ad6265SDimitry Andric IRBuilderBase::InsertPointGuard IPBuilder(Builder); 259181ad6265SDimitry Andric Builder.SetInsertPoint(VectorPH->getTerminator()); 259281ad6265SDimitry Andric StartV = Iden = 259381ad6265SDimitry Andric Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); 259481ad6265SDimitry Andric } 259581ad6265SDimitry Andric } else { 259681ad6265SDimitry Andric Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(), 259781ad6265SDimitry Andric RdxDesc.getFastMathFlags()); 259881ad6265SDimitry Andric 259981ad6265SDimitry Andric if (!ScalarPHI) { 260081ad6265SDimitry Andric Iden = Builder.CreateVectorSplat(State.VF, Iden); 260181ad6265SDimitry Andric IRBuilderBase::InsertPointGuard IPBuilder(Builder); 260281ad6265SDimitry Andric Builder.SetInsertPoint(VectorPH->getTerminator()); 260381ad6265SDimitry Andric Constant *Zero = Builder.getInt32(0); 260481ad6265SDimitry Andric StartV = Builder.CreateInsertElement(Iden, StartV, Zero); 260581ad6265SDimitry Andric } 260681ad6265SDimitry Andric } 260781ad6265SDimitry Andric 260881ad6265SDimitry Andric for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 2609*0fca6ea1SDimitry Andric Value *EntryPart = State.get(this, Part, IsInLoop); 261081ad6265SDimitry Andric // Make sure to add the reduction start value only to the 261181ad6265SDimitry Andric // first unroll part. 261281ad6265SDimitry Andric Value *StartVal = (Part == 0) ? StartV : Iden; 261381ad6265SDimitry Andric cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH); 261481ad6265SDimitry Andric } 261581ad6265SDimitry Andric } 261681ad6265SDimitry Andric 261781ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 261881ad6265SDimitry Andric void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, 261981ad6265SDimitry Andric VPSlotTracker &SlotTracker) const { 262081ad6265SDimitry Andric O << Indent << "WIDEN-REDUCTION-PHI "; 262181ad6265SDimitry Andric 262281ad6265SDimitry Andric printAsOperand(O, SlotTracker); 262381ad6265SDimitry Andric O << " = phi "; 262481ad6265SDimitry Andric printOperands(O, SlotTracker); 262581ad6265SDimitry Andric } 262681ad6265SDimitry Andric #endif 262781ad6265SDimitry Andric 262881ad6265SDimitry Andric void VPWidenPHIRecipe::execute(VPTransformState &State) { 262981ad6265SDimitry Andric assert(EnableVPlanNativePath && 263081ad6265SDimitry Andric "Non-native vplans are not expected to have VPWidenPHIRecipes."); 263181ad6265SDimitry Andric 26325f757f3fSDimitry Andric Value *Op0 = State.get(getOperand(0), 0); 263381ad6265SDimitry Andric Type *VecTy = Op0->getType(); 263481ad6265SDimitry Andric Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); 263581ad6265SDimitry Andric State.set(this, VecPhi, 0); 263681ad6265SDimitry Andric } 263781ad6265SDimitry Andric 263881ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 263981ad6265SDimitry Andric void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, 264081ad6265SDimitry Andric VPSlotTracker &SlotTracker) const { 264181ad6265SDimitry Andric O << Indent << "WIDEN-PHI "; 264281ad6265SDimitry Andric 264381ad6265SDimitry Andric auto *OriginalPhi = cast<PHINode>(getUnderlyingValue()); 264481ad6265SDimitry Andric // Unless all incoming values are modeled in VPlan print the original PHI 264581ad6265SDimitry Andric // directly. 264681ad6265SDimitry Andric // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming 264781ad6265SDimitry Andric // values as VPValues. 264881ad6265SDimitry Andric if (getNumOperands() != OriginalPhi->getNumOperands()) { 264981ad6265SDimitry Andric O << VPlanIngredient(OriginalPhi); 265081ad6265SDimitry Andric return; 265181ad6265SDimitry Andric } 265281ad6265SDimitry Andric 265381ad6265SDimitry Andric printAsOperand(O, SlotTracker); 265481ad6265SDimitry Andric O << " = phi "; 265581ad6265SDimitry Andric printOperands(O, SlotTracker); 265681ad6265SDimitry Andric } 265781ad6265SDimitry Andric #endif 2658753f127fSDimitry Andric 2659753f127fSDimitry Andric // TODO: It would be good to use the existing VPWidenPHIRecipe instead and 2660753f127fSDimitry Andric // remove VPActiveLaneMaskPHIRecipe. 2661753f127fSDimitry Andric void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) { 2662753f127fSDimitry Andric BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 2663753f127fSDimitry Andric for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 2664753f127fSDimitry Andric Value *StartMask = State.get(getOperand(0), Part); 2665753f127fSDimitry Andric PHINode *EntryPart = 2666753f127fSDimitry Andric State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask"); 2667753f127fSDimitry Andric EntryPart->addIncoming(StartMask, VectorPH); 26685f757f3fSDimitry Andric EntryPart->setDebugLoc(getDebugLoc()); 2669753f127fSDimitry Andric State.set(this, EntryPart, Part); 2670753f127fSDimitry Andric } 2671753f127fSDimitry Andric } 2672753f127fSDimitry Andric 2673753f127fSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2674753f127fSDimitry Andric void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent, 2675753f127fSDimitry Andric VPSlotTracker &SlotTracker) const { 2676753f127fSDimitry Andric O << Indent << "ACTIVE-LANE-MASK-PHI "; 2677753f127fSDimitry Andric 2678753f127fSDimitry Andric printAsOperand(O, SlotTracker); 2679753f127fSDimitry Andric O << " = phi "; 2680753f127fSDimitry Andric printOperands(O, SlotTracker); 2681753f127fSDimitry Andric } 2682753f127fSDimitry Andric #endif 2683*0fca6ea1SDimitry Andric 2684*0fca6ea1SDimitry Andric void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) { 2685*0fca6ea1SDimitry Andric BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 2686*0fca6ea1SDimitry Andric assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization."); 2687*0fca6ea1SDimitry Andric Value *Start = State.get(getOperand(0), VPIteration(0, 0)); 2688*0fca6ea1SDimitry Andric PHINode *EntryPart = 2689*0fca6ea1SDimitry Andric State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv"); 2690*0fca6ea1SDimitry Andric EntryPart->addIncoming(Start, VectorPH); 2691*0fca6ea1SDimitry Andric EntryPart->setDebugLoc(getDebugLoc()); 2692*0fca6ea1SDimitry Andric State.set(this, EntryPart, 0, /*IsScalar=*/true); 2693*0fca6ea1SDimitry Andric } 2694*0fca6ea1SDimitry Andric 2695*0fca6ea1SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2696*0fca6ea1SDimitry Andric void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 2697*0fca6ea1SDimitry Andric VPSlotTracker &SlotTracker) const { 2698*0fca6ea1SDimitry Andric O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI "; 2699*0fca6ea1SDimitry Andric 2700*0fca6ea1SDimitry Andric printAsOperand(O, SlotTracker); 2701*0fca6ea1SDimitry Andric O << " = phi "; 2702*0fca6ea1SDimitry Andric printOperands(O, SlotTracker); 2703*0fca6ea1SDimitry Andric } 2704*0fca6ea1SDimitry Andric #endif 2705