1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains implementations for different VPlan recipes. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "VPlan.h" 15 #include "VPlanAnalysis.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/Twine.h" 19 #include "llvm/Analysis/IVDescriptors.h" 20 #include "llvm/IR/BasicBlock.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/Instruction.h" 23 #include "llvm/IR/Instructions.h" 24 #include "llvm/IR/Type.h" 25 #include "llvm/IR/Value.h" 26 #include "llvm/Support/Casting.h" 27 #include "llvm/Support/CommandLine.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 31 #include "llvm/Transforms/Utils/LoopUtils.h" 32 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 33 #include <cassert> 34 35 using namespace llvm; 36 37 using VectorParts = SmallVector<Value *, 2>; 38 39 namespace llvm { 40 extern cl::opt<bool> EnableVPlanNativePath; 41 } 42 43 #define LV_NAME "loop-vectorize" 44 #define DEBUG_TYPE LV_NAME 45 46 bool VPRecipeBase::mayWriteToMemory() const { 47 switch (getVPDefID()) { 48 case VPInterleaveSC: 49 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0; 50 case VPWidenStoreEVLSC: 51 case VPWidenStoreSC: 52 return true; 53 case VPReplicateSC: 54 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 55 ->mayWriteToMemory(); 56 case VPWidenCallSC: 57 return !cast<VPWidenCallRecipe>(this) 58 ->getCalledScalarFunction() 59 ->onlyReadsMemory(); 60 case VPBranchOnMaskSC: 61 case VPScalarIVStepsSC: 62 case VPPredInstPHISC: 63 return false; 64 case VPBlendSC: 65 case VPReductionSC: 66 case VPWidenCanonicalIVSC: 67 case VPWidenCastSC: 68 case VPWidenGEPSC: 69 case VPWidenIntOrFpInductionSC: 70 case VPWidenLoadEVLSC: 71 case VPWidenLoadSC: 72 case VPWidenPHISC: 73 case VPWidenSC: 74 case VPWidenSelectSC: { 75 const Instruction *I = 76 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 77 (void)I; 78 assert((!I || !I->mayWriteToMemory()) && 79 "underlying instruction may write to memory"); 80 return false; 81 } 82 default: 83 return true; 84 } 85 } 86 87 bool VPRecipeBase::mayReadFromMemory() const { 88 switch (getVPDefID()) { 89 case VPWidenLoadEVLSC: 90 case VPWidenLoadSC: 91 return true; 92 case VPReplicateSC: 93 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 94 ->mayReadFromMemory(); 95 case VPWidenCallSC: 96 return !cast<VPWidenCallRecipe>(this) 97 ->getCalledScalarFunction() 98 ->onlyWritesMemory(); 99 case VPBranchOnMaskSC: 100 case VPPredInstPHISC: 101 case VPScalarIVStepsSC: 102 case VPWidenStoreEVLSC: 103 case VPWidenStoreSC: 104 return false; 105 case VPBlendSC: 106 case VPReductionSC: 107 case VPWidenCanonicalIVSC: 108 case VPWidenCastSC: 109 case VPWidenGEPSC: 110 case VPWidenIntOrFpInductionSC: 111 case VPWidenPHISC: 112 case VPWidenSC: 113 case VPWidenSelectSC: { 114 const Instruction *I = 115 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 116 (void)I; 117 assert((!I || !I->mayReadFromMemory()) && 118 "underlying instruction may read from memory"); 119 return false; 120 } 121 default: 122 return true; 123 } 124 } 125 126 bool VPRecipeBase::mayHaveSideEffects() const { 127 switch (getVPDefID()) { 128 case VPDerivedIVSC: 129 case VPPredInstPHISC: 130 case VPScalarCastSC: 131 return false; 132 case VPInstructionSC: 133 switch (cast<VPInstruction>(this)->getOpcode()) { 134 case Instruction::Or: 135 case Instruction::ICmp: 136 case Instruction::Select: 137 case VPInstruction::Not: 138 case VPInstruction::CalculateTripCountMinusVF: 139 case VPInstruction::CanonicalIVIncrementForPart: 140 case VPInstruction::PtrAdd: 141 return false; 142 default: 143 return true; 144 } 145 case VPWidenCallSC: { 146 Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction(); 147 return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn(); 148 } 149 case VPBlendSC: 150 case VPReductionSC: 151 case VPScalarIVStepsSC: 152 case VPWidenCanonicalIVSC: 153 case VPWidenCastSC: 154 case VPWidenGEPSC: 155 case VPWidenIntOrFpInductionSC: 156 case VPWidenPHISC: 157 case VPWidenPointerInductionSC: 158 case VPWidenSC: 159 case VPWidenSelectSC: { 160 const Instruction *I = 161 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 162 (void)I; 163 assert((!I || !I->mayHaveSideEffects()) && 164 "underlying instruction has side-effects"); 165 return false; 166 } 167 case VPInterleaveSC: 168 return mayWriteToMemory(); 169 case VPWidenLoadEVLSC: 170 case VPWidenLoadSC: 171 case VPWidenStoreEVLSC: 172 case VPWidenStoreSC: 173 assert( 174 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() == 175 mayWriteToMemory() && 176 "mayHaveSideffects result for ingredient differs from this " 177 "implementation"); 178 return mayWriteToMemory(); 179 case VPReplicateSC: { 180 auto *R = cast<VPReplicateRecipe>(this); 181 return R->getUnderlyingInstr()->mayHaveSideEffects(); 182 } 183 default: 184 return true; 185 } 186 } 187 188 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { 189 auto Lane = VPLane::getLastLaneForVF(State.VF); 190 VPValue *ExitValue = getOperand(0); 191 if (vputils::isUniformAfterVectorization(ExitValue)) 192 Lane = VPLane::getFirstLane(); 193 VPBasicBlock *MiddleVPBB = 194 cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor()); 195 assert(MiddleVPBB->getNumSuccessors() == 0 && 196 "the middle block must not have any successors"); 197 BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB]; 198 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)), 199 MiddleBB); 200 } 201 202 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 203 void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const { 204 O << "Live-out "; 205 getPhi()->printAsOperand(O); 206 O << " = "; 207 getOperand(0)->printAsOperand(O, SlotTracker); 208 O << "\n"; 209 } 210 #endif 211 212 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { 213 assert(!Parent && "Recipe already in some VPBasicBlock"); 214 assert(InsertPos->getParent() && 215 "Insertion position not in any VPBasicBlock"); 216 InsertPos->getParent()->insert(this, InsertPos->getIterator()); 217 } 218 219 void VPRecipeBase::insertBefore(VPBasicBlock &BB, 220 iplist<VPRecipeBase>::iterator I) { 221 assert(!Parent && "Recipe already in some VPBasicBlock"); 222 assert(I == BB.end() || I->getParent() == &BB); 223 BB.insert(this, I); 224 } 225 226 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { 227 assert(!Parent && "Recipe already in some VPBasicBlock"); 228 assert(InsertPos->getParent() && 229 "Insertion position not in any VPBasicBlock"); 230 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator())); 231 } 232 233 void VPRecipeBase::removeFromParent() { 234 assert(getParent() && "Recipe not in any VPBasicBlock"); 235 getParent()->getRecipeList().remove(getIterator()); 236 Parent = nullptr; 237 } 238 239 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { 240 assert(getParent() && "Recipe not in any VPBasicBlock"); 241 return getParent()->getRecipeList().erase(getIterator()); 242 } 243 244 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { 245 removeFromParent(); 246 insertAfter(InsertPos); 247 } 248 249 void VPRecipeBase::moveBefore(VPBasicBlock &BB, 250 iplist<VPRecipeBase>::iterator I) { 251 removeFromParent(); 252 insertBefore(BB, I); 253 } 254 255 FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const { 256 assert(OpType == OperationType::FPMathOp && 257 "recipe doesn't have fast math flags"); 258 FastMathFlags Res; 259 Res.setAllowReassoc(FMFs.AllowReassoc); 260 Res.setNoNaNs(FMFs.NoNaNs); 261 Res.setNoInfs(FMFs.NoInfs); 262 Res.setNoSignedZeros(FMFs.NoSignedZeros); 263 Res.setAllowReciprocal(FMFs.AllowReciprocal); 264 Res.setAllowContract(FMFs.AllowContract); 265 Res.setApproxFunc(FMFs.ApproxFunc); 266 return Res; 267 } 268 269 VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, 270 VPValue *A, VPValue *B, DebugLoc DL, 271 const Twine &Name) 272 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}), 273 Pred, DL), 274 Opcode(Opcode), Name(Name.str()) { 275 assert(Opcode == Instruction::ICmp && 276 "only ICmp predicates supported at the moment"); 277 } 278 279 VPInstruction::VPInstruction(unsigned Opcode, 280 std::initializer_list<VPValue *> Operands, 281 FastMathFlags FMFs, DebugLoc DL, const Twine &Name) 282 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL), 283 Opcode(Opcode), Name(Name.str()) { 284 // Make sure the VPInstruction is a floating-point operation. 285 assert(isFPMathOp() && "this op can't take fast-math flags"); 286 } 287 288 bool VPInstruction::doesGeneratePerAllLanes() const { 289 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this); 290 } 291 292 bool VPInstruction::canGenerateScalarForFirstLane() const { 293 if (Instruction::isBinaryOp(getOpcode())) 294 return true; 295 296 switch (Opcode) { 297 case VPInstruction::BranchOnCond: 298 case VPInstruction::BranchOnCount: 299 case VPInstruction::CalculateTripCountMinusVF: 300 case VPInstruction::CanonicalIVIncrementForPart: 301 case VPInstruction::ComputeReductionResult: 302 case VPInstruction::PtrAdd: 303 case VPInstruction::ExplicitVectorLength: 304 return true; 305 default: 306 return false; 307 } 308 } 309 310 Value *VPInstruction::generatePerLane(VPTransformState &State, 311 const VPIteration &Lane) { 312 IRBuilderBase &Builder = State.Builder; 313 314 assert(getOpcode() == VPInstruction::PtrAdd && 315 "only PtrAdd opcodes are supported for now"); 316 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane), 317 State.get(getOperand(1), Lane), Name); 318 } 319 320 Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { 321 IRBuilderBase &Builder = State.Builder; 322 323 if (Instruction::isBinaryOp(getOpcode())) { 324 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); 325 if (Part != 0 && vputils::onlyFirstPartUsed(this)) 326 return State.get(this, 0, OnlyFirstLaneUsed); 327 328 Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed); 329 Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed); 330 auto *Res = 331 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name); 332 if (auto *I = dyn_cast<Instruction>(Res)) 333 setFlags(I); 334 return Res; 335 } 336 337 switch (getOpcode()) { 338 case VPInstruction::Not: { 339 Value *A = State.get(getOperand(0), Part); 340 return Builder.CreateNot(A, Name); 341 } 342 case Instruction::ICmp: { 343 Value *A = State.get(getOperand(0), Part); 344 Value *B = State.get(getOperand(1), Part); 345 return Builder.CreateCmp(getPredicate(), A, B, Name); 346 } 347 case Instruction::Select: { 348 Value *Cond = State.get(getOperand(0), Part); 349 Value *Op1 = State.get(getOperand(1), Part); 350 Value *Op2 = State.get(getOperand(2), Part); 351 return Builder.CreateSelect(Cond, Op1, Op2, Name); 352 } 353 case VPInstruction::ActiveLaneMask: { 354 // Get first lane of vector induction variable. 355 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); 356 // Get the original loop tripcount. 357 Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0)); 358 359 // If this part of the active lane mask is scalar, generate the CMP directly 360 // to avoid unnecessary extracts. 361 if (State.VF.isScalar()) 362 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC, 363 Name); 364 365 auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); 366 auto *PredTy = VectorType::get(Int1Ty, State.VF); 367 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask, 368 {PredTy, ScalarTC->getType()}, 369 {VIVElem0, ScalarTC}, nullptr, Name); 370 } 371 case VPInstruction::FirstOrderRecurrenceSplice: { 372 // Generate code to combine the previous and current values in vector v3. 373 // 374 // vector.ph: 375 // v_init = vector(..., ..., ..., a[-1]) 376 // br vector.body 377 // 378 // vector.body 379 // i = phi [0, vector.ph], [i+4, vector.body] 380 // v1 = phi [v_init, vector.ph], [v2, vector.body] 381 // v2 = a[i, i+1, i+2, i+3]; 382 // v3 = vector(v1(3), v2(0, 1, 2)) 383 384 // For the first part, use the recurrence phi (v1), otherwise v2. 385 auto *V1 = State.get(getOperand(0), 0); 386 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); 387 if (!PartMinus1->getType()->isVectorTy()) 388 return PartMinus1; 389 Value *V2 = State.get(getOperand(1), Part); 390 return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name); 391 } 392 case VPInstruction::CalculateTripCountMinusVF: { 393 if (Part != 0) 394 return State.get(this, 0, /*IsScalar*/ true); 395 396 Value *ScalarTC = State.get(getOperand(0), {0, 0}); 397 Value *Step = 398 createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF); 399 Value *Sub = Builder.CreateSub(ScalarTC, Step); 400 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step); 401 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0); 402 return Builder.CreateSelect(Cmp, Sub, Zero); 403 } 404 case VPInstruction::ExplicitVectorLength: { 405 // Compute EVL 406 auto GetEVL = [=](VPTransformState &State, Value *AVL) { 407 assert(AVL->getType()->isIntegerTy() && 408 "Requested vector length should be an integer."); 409 410 // TODO: Add support for MaxSafeDist for correct loop emission. 411 assert(State.VF.isScalable() && "Expected scalable vector factor."); 412 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue()); 413 414 Value *EVL = State.Builder.CreateIntrinsic( 415 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length, 416 {AVL, VFArg, State.Builder.getTrue()}); 417 return EVL; 418 }; 419 // TODO: Restructure this code with an explicit remainder loop, vsetvli can 420 // be outside of the main loop. 421 assert(Part == 0 && "No unrolling expected for predicated vectorization."); 422 // Compute VTC - IV as the AVL (requested vector length). 423 Value *Index = State.get(getOperand(0), VPIteration(0, 0)); 424 Value *TripCount = State.get(getOperand(1), VPIteration(0, 0)); 425 Value *AVL = State.Builder.CreateSub(TripCount, Index); 426 Value *EVL = GetEVL(State, AVL); 427 return EVL; 428 } 429 case VPInstruction::CanonicalIVIncrementForPart: { 430 auto *IV = State.get(getOperand(0), VPIteration(0, 0)); 431 if (Part == 0) 432 return IV; 433 434 // The canonical IV is incremented by the vectorization factor (num of SIMD 435 // elements) times the unroll part. 436 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part); 437 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(), 438 hasNoSignedWrap()); 439 } 440 case VPInstruction::BranchOnCond: { 441 if (Part != 0) 442 return nullptr; 443 444 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0)); 445 VPRegionBlock *ParentRegion = getParent()->getParent(); 446 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock(); 447 448 // Replace the temporary unreachable terminator with a new conditional 449 // branch, hooking it up to backward destination for exiting blocks now and 450 // to forward destination(s) later when they are created. 451 BranchInst *CondBr = 452 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr); 453 454 if (getParent()->isExiting()) 455 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]); 456 457 CondBr->setSuccessor(0, nullptr); 458 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 459 return CondBr; 460 } 461 case VPInstruction::BranchOnCount: { 462 if (Part != 0) 463 return nullptr; 464 // First create the compare. 465 Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true); 466 Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true); 467 Value *Cond = Builder.CreateICmpEQ(IV, TC); 468 469 // Now create the branch. 470 auto *Plan = getParent()->getPlan(); 471 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); 472 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); 473 474 // Replace the temporary unreachable terminator with a new conditional 475 // branch, hooking it up to backward destination (the header) now and to the 476 // forward destination (the exit/middle block) later when it is created. 477 // Note that CreateCondBr expects a valid BB as first argument, so we need 478 // to set it to nullptr later. 479 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), 480 State.CFG.VPBB2IRBB[Header]); 481 CondBr->setSuccessor(0, nullptr); 482 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 483 return CondBr; 484 } 485 case VPInstruction::ComputeReductionResult: { 486 if (Part != 0) 487 return State.get(this, 0, /*IsScalar*/ true); 488 489 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary 490 // and will be removed by breaking up the recipe further. 491 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0)); 492 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue()); 493 // Get its reduction variable descriptor. 494 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); 495 496 RecurKind RK = RdxDesc.getRecurrenceKind(); 497 498 VPValue *LoopExitingDef = getOperand(1); 499 Type *PhiTy = OrigPhi->getType(); 500 VectorParts RdxParts(State.UF); 501 for (unsigned Part = 0; Part < State.UF; ++Part) 502 RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop()); 503 504 // If the vector reduction can be performed in a smaller type, we truncate 505 // then extend the loop exit value to enable InstCombine to evaluate the 506 // entire expression in the smaller type. 507 // TODO: Handle this in truncateToMinBW. 508 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { 509 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF); 510 for (unsigned Part = 0; Part < State.UF; ++Part) 511 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); 512 } 513 // Reduce all of the unrolled parts into a single vector. 514 Value *ReducedPartRdx = RdxParts[0]; 515 unsigned Op = RecurrenceDescriptor::getOpcode(RK); 516 if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) 517 Op = Instruction::Or; 518 519 if (PhiR->isOrdered()) { 520 ReducedPartRdx = RdxParts[State.UF - 1]; 521 } else { 522 // Floating-point operations should have some FMF to enable the reduction. 523 IRBuilderBase::FastMathFlagGuard FMFG(Builder); 524 Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); 525 for (unsigned Part = 1; Part < State.UF; ++Part) { 526 Value *RdxPart = RdxParts[Part]; 527 if (Op != Instruction::ICmp && Op != Instruction::FCmp) 528 ReducedPartRdx = Builder.CreateBinOp( 529 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx"); 530 else 531 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart); 532 } 533 } 534 535 // Create the reduction after the loop. Note that inloop reductions create 536 // the target reduction in the loop using a Reduction recipe. 537 if ((State.VF.isVector() || 538 RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) && 539 !PhiR->isInLoop()) { 540 ReducedPartRdx = 541 createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi); 542 // If the reduction can be performed in a smaller type, we need to extend 543 // the reduction to the wider type before we branch to the original loop. 544 if (PhiTy != RdxDesc.getRecurrenceType()) 545 ReducedPartRdx = RdxDesc.isSigned() 546 ? Builder.CreateSExt(ReducedPartRdx, PhiTy) 547 : Builder.CreateZExt(ReducedPartRdx, PhiTy); 548 } 549 550 // If there were stores of the reduction value to a uniform memory address 551 // inside the loop, create the final store here. 552 if (StoreInst *SI = RdxDesc.IntermediateStore) { 553 auto *NewSI = Builder.CreateAlignedStore( 554 ReducedPartRdx, SI->getPointerOperand(), SI->getAlign()); 555 propagateMetadata(NewSI, SI); 556 } 557 558 return ReducedPartRdx; 559 } 560 case VPInstruction::PtrAdd: { 561 assert(vputils::onlyFirstLaneUsed(this) && 562 "can only generate first lane for PtrAdd"); 563 Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true); 564 Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true); 565 return Builder.CreatePtrAdd(Ptr, Addend, Name); 566 } 567 default: 568 llvm_unreachable("Unsupported opcode for instruction"); 569 } 570 } 571 572 #if !defined(NDEBUG) 573 bool VPInstruction::isFPMathOp() const { 574 // Inspired by FPMathOperator::classof. Notable differences are that we don't 575 // support Call, PHI and Select opcodes here yet. 576 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul || 577 Opcode == Instruction::FNeg || Opcode == Instruction::FSub || 578 Opcode == Instruction::FDiv || Opcode == Instruction::FRem || 579 Opcode == Instruction::FCmp || Opcode == Instruction::Select; 580 } 581 #endif 582 583 void VPInstruction::execute(VPTransformState &State) { 584 assert(!State.Instance && "VPInstruction executing an Instance"); 585 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 586 assert((hasFastMathFlags() == isFPMathOp() || 587 getOpcode() == Instruction::Select) && 588 "Recipe not a FPMathOp but has fast-math flags?"); 589 if (hasFastMathFlags()) 590 State.Builder.setFastMathFlags(getFastMathFlags()); 591 State.setDebugLocFrom(getDebugLoc()); 592 bool GeneratesPerFirstLaneOnly = 593 canGenerateScalarForFirstLane() && 594 (vputils::onlyFirstLaneUsed(this) || 595 getOpcode() == VPInstruction::ComputeReductionResult); 596 bool GeneratesPerAllLanes = doesGeneratePerAllLanes(); 597 for (unsigned Part = 0; Part < State.UF; ++Part) { 598 if (GeneratesPerAllLanes) { 599 for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue(); 600 Lane != NumLanes; ++Lane) { 601 Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane)); 602 assert(GeneratedValue && "generatePerLane must produce a value"); 603 State.set(this, GeneratedValue, VPIteration(Part, Lane)); 604 } 605 continue; 606 } 607 608 Value *GeneratedValue = generatePerPart(State, Part); 609 if (!hasResult()) 610 continue; 611 assert(GeneratedValue && "generatePerPart must produce a value"); 612 assert((GeneratedValue->getType()->isVectorTy() == 613 !GeneratesPerFirstLaneOnly || 614 State.VF.isScalar()) && 615 "scalar value but not only first lane defined"); 616 State.set(this, GeneratedValue, Part, 617 /*IsScalar*/ GeneratesPerFirstLaneOnly); 618 } 619 } 620 621 bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { 622 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); 623 if (Instruction::isBinaryOp(getOpcode())) 624 return vputils::onlyFirstLaneUsed(this); 625 626 switch (getOpcode()) { 627 default: 628 return false; 629 case Instruction::ICmp: 630 case VPInstruction::PtrAdd: 631 // TODO: Cover additional opcodes. 632 return vputils::onlyFirstLaneUsed(this); 633 case VPInstruction::ActiveLaneMask: 634 case VPInstruction::ExplicitVectorLength: 635 case VPInstruction::CalculateTripCountMinusVF: 636 case VPInstruction::CanonicalIVIncrementForPart: 637 case VPInstruction::BranchOnCount: 638 return true; 639 }; 640 llvm_unreachable("switch should return"); 641 } 642 643 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 644 void VPInstruction::dump() const { 645 VPSlotTracker SlotTracker(getParent()->getPlan()); 646 print(dbgs(), "", SlotTracker); 647 } 648 649 void VPInstruction::print(raw_ostream &O, const Twine &Indent, 650 VPSlotTracker &SlotTracker) const { 651 O << Indent << "EMIT "; 652 653 if (hasResult()) { 654 printAsOperand(O, SlotTracker); 655 O << " = "; 656 } 657 658 switch (getOpcode()) { 659 case VPInstruction::Not: 660 O << "not"; 661 break; 662 case VPInstruction::SLPLoad: 663 O << "combined load"; 664 break; 665 case VPInstruction::SLPStore: 666 O << "combined store"; 667 break; 668 case VPInstruction::ActiveLaneMask: 669 O << "active lane mask"; 670 break; 671 case VPInstruction::ExplicitVectorLength: 672 O << "EXPLICIT-VECTOR-LENGTH"; 673 break; 674 case VPInstruction::FirstOrderRecurrenceSplice: 675 O << "first-order splice"; 676 break; 677 case VPInstruction::BranchOnCond: 678 O << "branch-on-cond"; 679 break; 680 case VPInstruction::CalculateTripCountMinusVF: 681 O << "TC > VF ? TC - VF : 0"; 682 break; 683 case VPInstruction::CanonicalIVIncrementForPart: 684 O << "VF * Part +"; 685 break; 686 case VPInstruction::BranchOnCount: 687 O << "branch-on-count"; 688 break; 689 case VPInstruction::ComputeReductionResult: 690 O << "compute-reduction-result"; 691 break; 692 case VPInstruction::PtrAdd: 693 O << "ptradd"; 694 break; 695 default: 696 O << Instruction::getOpcodeName(getOpcode()); 697 } 698 699 printFlags(O); 700 printOperands(O, SlotTracker); 701 702 if (auto DL = getDebugLoc()) { 703 O << ", !dbg "; 704 DL.print(O); 705 } 706 } 707 #endif 708 709 void VPWidenCallRecipe::execute(VPTransformState &State) { 710 assert(State.VF.isVector() && "not widening"); 711 Function *CalledScalarFn = getCalledScalarFunction(); 712 assert(!isDbgInfoIntrinsic(CalledScalarFn->getIntrinsicID()) && 713 "DbgInfoIntrinsic should have been dropped during VPlan construction"); 714 State.setDebugLocFrom(getDebugLoc()); 715 716 bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic; 717 FunctionType *VFTy = nullptr; 718 if (Variant) 719 VFTy = Variant->getFunctionType(); 720 for (unsigned Part = 0; Part < State.UF; ++Part) { 721 SmallVector<Type *, 2> TysForDecl; 722 // Add return type if intrinsic is overloaded on it. 723 if (UseIntrinsic && 724 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) 725 TysForDecl.push_back(VectorType::get( 726 CalledScalarFn->getReturnType()->getScalarType(), State.VF)); 727 SmallVector<Value *, 4> Args; 728 for (const auto &I : enumerate(arg_operands())) { 729 // Some intrinsics have a scalar argument - don't replace it with a 730 // vector. 731 Value *Arg; 732 if (UseIntrinsic && 733 isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())) 734 Arg = State.get(I.value(), VPIteration(0, 0)); 735 // Some vectorized function variants may also take a scalar argument, 736 // e.g. linear parameters for pointers. This needs to be the scalar value 737 // from the start of the respective part when interleaving. 738 else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy()) 739 Arg = State.get(I.value(), VPIteration(Part, 0)); 740 else 741 Arg = State.get(I.value(), Part); 742 if (UseIntrinsic && 743 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index())) 744 TysForDecl.push_back(Arg->getType()); 745 Args.push_back(Arg); 746 } 747 748 Function *VectorF; 749 if (UseIntrinsic) { 750 // Use vector version of the intrinsic. 751 Module *M = State.Builder.GetInsertBlock()->getModule(); 752 VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl); 753 assert(VectorF && "Can't retrieve vector intrinsic."); 754 } else { 755 #ifndef NDEBUG 756 assert(Variant != nullptr && "Can't create vector function."); 757 #endif 758 VectorF = Variant; 759 } 760 761 auto *CI = cast_or_null<CallInst>(getUnderlyingInstr()); 762 SmallVector<OperandBundleDef, 1> OpBundles; 763 if (CI) 764 CI->getOperandBundlesAsDefs(OpBundles); 765 766 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles); 767 768 if (isa<FPMathOperator>(V)) 769 V->copyFastMathFlags(CI); 770 771 if (!V->getType()->isVoidTy()) 772 State.set(this, V, Part); 773 State.addMetadata(V, CI); 774 } 775 } 776 777 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 778 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, 779 VPSlotTracker &SlotTracker) const { 780 O << Indent << "WIDEN-CALL "; 781 782 Function *CalledFn = getCalledScalarFunction(); 783 if (CalledFn->getReturnType()->isVoidTy()) 784 O << "void "; 785 else { 786 printAsOperand(O, SlotTracker); 787 O << " = "; 788 } 789 790 O << "call @" << CalledFn->getName() << "("; 791 interleaveComma(arg_operands(), O, [&O, &SlotTracker](VPValue *Op) { 792 Op->printAsOperand(O, SlotTracker); 793 }); 794 O << ")"; 795 796 if (VectorIntrinsicID) 797 O << " (using vector intrinsic)"; 798 else { 799 O << " (using library function"; 800 if (Variant->hasName()) 801 O << ": " << Variant->getName(); 802 O << ")"; 803 } 804 } 805 806 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, 807 VPSlotTracker &SlotTracker) const { 808 O << Indent << "WIDEN-SELECT "; 809 printAsOperand(O, SlotTracker); 810 O << " = select "; 811 getOperand(0)->printAsOperand(O, SlotTracker); 812 O << ", "; 813 getOperand(1)->printAsOperand(O, SlotTracker); 814 O << ", "; 815 getOperand(2)->printAsOperand(O, SlotTracker); 816 O << (isInvariantCond() ? " (condition is loop invariant)" : ""); 817 } 818 #endif 819 820 void VPWidenSelectRecipe::execute(VPTransformState &State) { 821 State.setDebugLocFrom(getDebugLoc()); 822 823 // The condition can be loop invariant but still defined inside the 824 // loop. This means that we can't just use the original 'cond' value. 825 // We have to take the 'vectorized' value and pick the first lane. 826 // Instcombine will make this a no-op. 827 auto *InvarCond = 828 isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr; 829 830 for (unsigned Part = 0; Part < State.UF; ++Part) { 831 Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part); 832 Value *Op0 = State.get(getOperand(1), Part); 833 Value *Op1 = State.get(getOperand(2), Part); 834 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1); 835 State.set(this, Sel, Part); 836 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue())); 837 } 838 } 839 840 VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy( 841 const FastMathFlags &FMF) { 842 AllowReassoc = FMF.allowReassoc(); 843 NoNaNs = FMF.noNaNs(); 844 NoInfs = FMF.noInfs(); 845 NoSignedZeros = FMF.noSignedZeros(); 846 AllowReciprocal = FMF.allowReciprocal(); 847 AllowContract = FMF.allowContract(); 848 ApproxFunc = FMF.approxFunc(); 849 } 850 851 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 852 void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const { 853 switch (OpType) { 854 case OperationType::Cmp: 855 O << " " << CmpInst::getPredicateName(getPredicate()); 856 break; 857 case OperationType::DisjointOp: 858 if (DisjointFlags.IsDisjoint) 859 O << " disjoint"; 860 break; 861 case OperationType::PossiblyExactOp: 862 if (ExactFlags.IsExact) 863 O << " exact"; 864 break; 865 case OperationType::OverflowingBinOp: 866 if (WrapFlags.HasNUW) 867 O << " nuw"; 868 if (WrapFlags.HasNSW) 869 O << " nsw"; 870 break; 871 case OperationType::FPMathOp: 872 getFastMathFlags().print(O); 873 break; 874 case OperationType::GEPOp: 875 if (GEPFlags.IsInBounds) 876 O << " inbounds"; 877 break; 878 case OperationType::NonNegOp: 879 if (NonNegFlags.NonNeg) 880 O << " nneg"; 881 break; 882 case OperationType::Other: 883 break; 884 } 885 if (getNumOperands() > 0) 886 O << " "; 887 } 888 #endif 889 890 void VPWidenRecipe::execute(VPTransformState &State) { 891 State.setDebugLocFrom(getDebugLoc()); 892 auto &Builder = State.Builder; 893 switch (Opcode) { 894 case Instruction::Call: 895 case Instruction::Br: 896 case Instruction::PHI: 897 case Instruction::GetElementPtr: 898 case Instruction::Select: 899 llvm_unreachable("This instruction is handled by a different recipe."); 900 case Instruction::UDiv: 901 case Instruction::SDiv: 902 case Instruction::SRem: 903 case Instruction::URem: 904 case Instruction::Add: 905 case Instruction::FAdd: 906 case Instruction::Sub: 907 case Instruction::FSub: 908 case Instruction::FNeg: 909 case Instruction::Mul: 910 case Instruction::FMul: 911 case Instruction::FDiv: 912 case Instruction::FRem: 913 case Instruction::Shl: 914 case Instruction::LShr: 915 case Instruction::AShr: 916 case Instruction::And: 917 case Instruction::Or: 918 case Instruction::Xor: { 919 // Just widen unops and binops. 920 for (unsigned Part = 0; Part < State.UF; ++Part) { 921 SmallVector<Value *, 2> Ops; 922 for (VPValue *VPOp : operands()) 923 Ops.push_back(State.get(VPOp, Part)); 924 925 Value *V = Builder.CreateNAryOp(Opcode, Ops); 926 927 if (auto *VecOp = dyn_cast<Instruction>(V)) 928 setFlags(VecOp); 929 930 // Use this vector value for all users of the original instruction. 931 State.set(this, V, Part); 932 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue())); 933 } 934 935 break; 936 } 937 case Instruction::Freeze: { 938 for (unsigned Part = 0; Part < State.UF; ++Part) { 939 Value *Op = State.get(getOperand(0), Part); 940 941 Value *Freeze = Builder.CreateFreeze(Op); 942 State.set(this, Freeze, Part); 943 } 944 break; 945 } 946 case Instruction::ICmp: 947 case Instruction::FCmp: { 948 // Widen compares. Generate vector compares. 949 bool FCmp = Opcode == Instruction::FCmp; 950 for (unsigned Part = 0; Part < State.UF; ++Part) { 951 Value *A = State.get(getOperand(0), Part); 952 Value *B = State.get(getOperand(1), Part); 953 Value *C = nullptr; 954 if (FCmp) { 955 // Propagate fast math flags. 956 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 957 if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue())) 958 Builder.setFastMathFlags(I->getFastMathFlags()); 959 C = Builder.CreateFCmp(getPredicate(), A, B); 960 } else { 961 C = Builder.CreateICmp(getPredicate(), A, B); 962 } 963 State.set(this, C, Part); 964 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue())); 965 } 966 967 break; 968 } 969 default: 970 // This instruction is not vectorized by simple widening. 971 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : " 972 << Instruction::getOpcodeName(Opcode)); 973 llvm_unreachable("Unhandled instruction!"); 974 } // end of switch. 975 976 #if !defined(NDEBUG) 977 // Verify that VPlan type inference results agree with the type of the 978 // generated values. 979 for (unsigned Part = 0; Part < State.UF; ++Part) { 980 assert(VectorType::get(State.TypeAnalysis.inferScalarType(this), 981 State.VF) == State.get(this, Part)->getType() && 982 "inferred type and type from generated instructions do not match"); 983 } 984 #endif 985 } 986 987 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 988 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, 989 VPSlotTracker &SlotTracker) const { 990 O << Indent << "WIDEN "; 991 printAsOperand(O, SlotTracker); 992 O << " = " << Instruction::getOpcodeName(Opcode); 993 printFlags(O); 994 printOperands(O, SlotTracker); 995 } 996 #endif 997 998 void VPWidenCastRecipe::execute(VPTransformState &State) { 999 State.setDebugLocFrom(getDebugLoc()); 1000 auto &Builder = State.Builder; 1001 /// Vectorize casts. 1002 assert(State.VF.isVector() && "Not vectorizing?"); 1003 Type *DestTy = VectorType::get(getResultType(), State.VF); 1004 VPValue *Op = getOperand(0); 1005 for (unsigned Part = 0; Part < State.UF; ++Part) { 1006 if (Part > 0 && Op->isLiveIn()) { 1007 // FIXME: Remove once explicit unrolling is implemented using VPlan. 1008 State.set(this, State.get(this, 0), Part); 1009 continue; 1010 } 1011 Value *A = State.get(Op, Part); 1012 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy); 1013 State.set(this, Cast, Part); 1014 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue())); 1015 } 1016 } 1017 1018 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1019 void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent, 1020 VPSlotTracker &SlotTracker) const { 1021 O << Indent << "WIDEN-CAST "; 1022 printAsOperand(O, SlotTracker); 1023 O << " = " << Instruction::getOpcodeName(Opcode) << " "; 1024 printFlags(O); 1025 printOperands(O, SlotTracker); 1026 O << " to " << *getResultType(); 1027 } 1028 #endif 1029 1030 /// This function adds 1031 /// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...) 1032 /// to each vector element of Val. The sequence starts at StartIndex. 1033 /// \p Opcode is relevant for FP induction variable. 1034 static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step, 1035 Instruction::BinaryOps BinOp, ElementCount VF, 1036 IRBuilderBase &Builder) { 1037 assert(VF.isVector() && "only vector VFs are supported"); 1038 1039 // Create and check the types. 1040 auto *ValVTy = cast<VectorType>(Val->getType()); 1041 ElementCount VLen = ValVTy->getElementCount(); 1042 1043 Type *STy = Val->getType()->getScalarType(); 1044 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) && 1045 "Induction Step must be an integer or FP"); 1046 assert(Step->getType() == STy && "Step has wrong type"); 1047 1048 SmallVector<Constant *, 8> Indices; 1049 1050 // Create a vector of consecutive numbers from zero to VF. 1051 VectorType *InitVecValVTy = ValVTy; 1052 if (STy->isFloatingPointTy()) { 1053 Type *InitVecValSTy = 1054 IntegerType::get(STy->getContext(), STy->getScalarSizeInBits()); 1055 InitVecValVTy = VectorType::get(InitVecValSTy, VLen); 1056 } 1057 Value *InitVec = Builder.CreateStepVector(InitVecValVTy); 1058 1059 // Splat the StartIdx 1060 Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx); 1061 1062 if (STy->isIntegerTy()) { 1063 InitVec = Builder.CreateAdd(InitVec, StartIdxSplat); 1064 Step = Builder.CreateVectorSplat(VLen, Step); 1065 assert(Step->getType() == Val->getType() && "Invalid step vec"); 1066 // FIXME: The newly created binary instructions should contain nsw/nuw 1067 // flags, which can be found from the original scalar operations. 1068 Step = Builder.CreateMul(InitVec, Step); 1069 return Builder.CreateAdd(Val, Step, "induction"); 1070 } 1071 1072 // Floating point induction. 1073 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) && 1074 "Binary Opcode should be specified for FP induction"); 1075 InitVec = Builder.CreateUIToFP(InitVec, ValVTy); 1076 InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat); 1077 1078 Step = Builder.CreateVectorSplat(VLen, Step); 1079 Value *MulOp = Builder.CreateFMul(InitVec, Step); 1080 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction"); 1081 } 1082 1083 /// A helper function that returns an integer or floating-point constant with 1084 /// value C. 1085 static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { 1086 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C) 1087 : ConstantFP::get(Ty, C); 1088 } 1089 1090 static Value *getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy, 1091 ElementCount VF) { 1092 assert(FTy->isFloatingPointTy() && "Expected floating point type!"); 1093 Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits()); 1094 Value *RuntimeVF = getRuntimeVF(B, IntTy, VF); 1095 return B.CreateUIToFP(RuntimeVF, FTy); 1096 } 1097 1098 void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { 1099 assert(!State.Instance && "Int or FP induction being replicated."); 1100 1101 Value *Start = getStartValue()->getLiveInIRValue(); 1102 const InductionDescriptor &ID = getInductionDescriptor(); 1103 TruncInst *Trunc = getTruncInst(); 1104 IRBuilderBase &Builder = State.Builder; 1105 assert(IV->getType() == ID.getStartValue()->getType() && "Types must match"); 1106 assert(State.VF.isVector() && "must have vector VF"); 1107 1108 // The value from the original loop to which we are mapping the new induction 1109 // variable. 1110 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV; 1111 1112 // Fast-math-flags propagate from the original induction instruction. 1113 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 1114 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp())) 1115 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); 1116 1117 // Now do the actual transformations, and start with fetching the step value. 1118 Value *Step = State.get(getStepValue(), VPIteration(0, 0)); 1119 1120 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) && 1121 "Expected either an induction phi-node or a truncate of it!"); 1122 1123 // Construct the initial value of the vector IV in the vector loop preheader 1124 auto CurrIP = Builder.saveIP(); 1125 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1126 Builder.SetInsertPoint(VectorPH->getTerminator()); 1127 if (isa<TruncInst>(EntryVal)) { 1128 assert(Start->getType()->isIntegerTy() && 1129 "Truncation requires an integer type"); 1130 auto *TruncType = cast<IntegerType>(EntryVal->getType()); 1131 Step = Builder.CreateTrunc(Step, TruncType); 1132 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType); 1133 } 1134 1135 Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0); 1136 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start); 1137 Value *SteppedStart = getStepVector( 1138 SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder); 1139 1140 // We create vector phi nodes for both integer and floating-point induction 1141 // variables. Here, we determine the kind of arithmetic we will perform. 1142 Instruction::BinaryOps AddOp; 1143 Instruction::BinaryOps MulOp; 1144 if (Step->getType()->isIntegerTy()) { 1145 AddOp = Instruction::Add; 1146 MulOp = Instruction::Mul; 1147 } else { 1148 AddOp = ID.getInductionOpcode(); 1149 MulOp = Instruction::FMul; 1150 } 1151 1152 // Multiply the vectorization factor by the step using integer or 1153 // floating-point arithmetic as appropriate. 1154 Type *StepType = Step->getType(); 1155 Value *RuntimeVF; 1156 if (Step->getType()->isFloatingPointTy()) 1157 RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF); 1158 else 1159 RuntimeVF = getRuntimeVF(Builder, StepType, State.VF); 1160 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF); 1161 1162 // Create a vector splat to use in the induction update. 1163 // 1164 // FIXME: If the step is non-constant, we create the vector splat with 1165 // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't 1166 // handle a constant vector splat. 1167 Value *SplatVF = isa<Constant>(Mul) 1168 ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul)) 1169 : Builder.CreateVectorSplat(State.VF, Mul); 1170 Builder.restoreIP(CurrIP); 1171 1172 // We may need to add the step a number of times, depending on the unroll 1173 // factor. The last of those goes into the PHI. 1174 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind"); 1175 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); 1176 VecInd->setDebugLoc(EntryVal->getDebugLoc()); 1177 Instruction *LastInduction = VecInd; 1178 for (unsigned Part = 0; Part < State.UF; ++Part) { 1179 State.set(this, LastInduction, Part); 1180 1181 if (isa<TruncInst>(EntryVal)) 1182 State.addMetadata(LastInduction, EntryVal); 1183 1184 LastInduction = cast<Instruction>( 1185 Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")); 1186 LastInduction->setDebugLoc(EntryVal->getDebugLoc()); 1187 } 1188 1189 LastInduction->setName("vec.ind.next"); 1190 VecInd->addIncoming(SteppedStart, VectorPH); 1191 // Add induction update using an incorrect block temporarily. The phi node 1192 // will be fixed after VPlan execution. Note that at this point the latch 1193 // block cannot be used, as it does not exist yet. 1194 // TODO: Model increment value in VPlan, by turning the recipe into a 1195 // multi-def and a subclass of VPHeaderPHIRecipe. 1196 VecInd->addIncoming(LastInduction, VectorPH); 1197 } 1198 1199 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1200 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, 1201 VPSlotTracker &SlotTracker) const { 1202 O << Indent << "WIDEN-INDUCTION"; 1203 if (getTruncInst()) { 1204 O << "\\l\""; 1205 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; 1206 O << " +\n" << Indent << "\" "; 1207 getVPValue(0)->printAsOperand(O, SlotTracker); 1208 } else 1209 O << " " << VPlanIngredient(IV); 1210 1211 O << ", "; 1212 getStepValue()->printAsOperand(O, SlotTracker); 1213 } 1214 #endif 1215 1216 bool VPWidenIntOrFpInductionRecipe::isCanonical() const { 1217 // The step may be defined by a recipe in the preheader (e.g. if it requires 1218 // SCEV expansion), but for the canonical induction the step is required to be 1219 // 1, which is represented as live-in. 1220 if (getStepValue()->getDefiningRecipe()) 1221 return false; 1222 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue()); 1223 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); 1224 auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin()); 1225 return StartC && StartC->isZero() && StepC && StepC->isOne() && 1226 getScalarType() == CanIV->getScalarType(); 1227 } 1228 1229 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1230 void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent, 1231 VPSlotTracker &SlotTracker) const { 1232 O << Indent; 1233 printAsOperand(O, SlotTracker); 1234 O << Indent << "= DERIVED-IV "; 1235 getStartValue()->printAsOperand(O, SlotTracker); 1236 O << " + "; 1237 getOperand(1)->printAsOperand(O, SlotTracker); 1238 O << " * "; 1239 getStepValue()->printAsOperand(O, SlotTracker); 1240 } 1241 #endif 1242 1243 void VPScalarIVStepsRecipe::execute(VPTransformState &State) { 1244 // Fast-math-flags propagate from the original induction instruction. 1245 IRBuilder<>::FastMathFlagGuard FMFG(State.Builder); 1246 if (hasFastMathFlags()) 1247 State.Builder.setFastMathFlags(getFastMathFlags()); 1248 1249 /// Compute scalar induction steps. \p ScalarIV is the scalar induction 1250 /// variable on which to base the steps, \p Step is the size of the step. 1251 1252 Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0)); 1253 Value *Step = State.get(getStepValue(), VPIteration(0, 0)); 1254 IRBuilderBase &Builder = State.Builder; 1255 1256 // Ensure step has the same type as that of scalar IV. 1257 Type *BaseIVTy = BaseIV->getType()->getScalarType(); 1258 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!"); 1259 1260 // We build scalar steps for both integer and floating-point induction 1261 // variables. Here, we determine the kind of arithmetic we will perform. 1262 Instruction::BinaryOps AddOp; 1263 Instruction::BinaryOps MulOp; 1264 if (BaseIVTy->isIntegerTy()) { 1265 AddOp = Instruction::Add; 1266 MulOp = Instruction::Mul; 1267 } else { 1268 AddOp = InductionOpcode; 1269 MulOp = Instruction::FMul; 1270 } 1271 1272 // Determine the number of scalars we need to generate for each unroll 1273 // iteration. 1274 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this); 1275 // Compute the scalar steps and save the results in State. 1276 Type *IntStepTy = 1277 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits()); 1278 Type *VecIVTy = nullptr; 1279 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr; 1280 if (!FirstLaneOnly && State.VF.isScalable()) { 1281 VecIVTy = VectorType::get(BaseIVTy, State.VF); 1282 UnitStepVec = 1283 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF)); 1284 SplatStep = Builder.CreateVectorSplat(State.VF, Step); 1285 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV); 1286 } 1287 1288 unsigned StartPart = 0; 1289 unsigned EndPart = State.UF; 1290 unsigned StartLane = 0; 1291 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue(); 1292 if (State.Instance) { 1293 StartPart = State.Instance->Part; 1294 EndPart = StartPart + 1; 1295 StartLane = State.Instance->Lane.getKnownLane(); 1296 EndLane = StartLane + 1; 1297 } 1298 for (unsigned Part = StartPart; Part < EndPart; ++Part) { 1299 Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part); 1300 1301 if (!FirstLaneOnly && State.VF.isScalable()) { 1302 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0); 1303 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec); 1304 if (BaseIVTy->isFloatingPointTy()) 1305 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy); 1306 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep); 1307 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul); 1308 State.set(this, Add, Part); 1309 // It's useful to record the lane values too for the known minimum number 1310 // of elements so we do those below. This improves the code quality when 1311 // trying to extract the first element, for example. 1312 } 1313 1314 if (BaseIVTy->isFloatingPointTy()) 1315 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy); 1316 1317 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) { 1318 Value *StartIdx = Builder.CreateBinOp( 1319 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane)); 1320 // The step returned by `createStepForVF` is a runtime-evaluated value 1321 // when VF is scalable. Otherwise, it should be folded into a Constant. 1322 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) && 1323 "Expected StartIdx to be folded to a constant when VF is not " 1324 "scalable"); 1325 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step); 1326 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul); 1327 State.set(this, Add, VPIteration(Part, Lane)); 1328 } 1329 } 1330 } 1331 1332 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1333 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, 1334 VPSlotTracker &SlotTracker) const { 1335 O << Indent; 1336 printAsOperand(O, SlotTracker); 1337 O << " = SCALAR-STEPS "; 1338 printOperands(O, SlotTracker); 1339 } 1340 #endif 1341 1342 void VPWidenGEPRecipe::execute(VPTransformState &State) { 1343 assert(State.VF.isVector() && "not widening"); 1344 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr()); 1345 // Construct a vector GEP by widening the operands of the scalar GEP as 1346 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP 1347 // results in a vector of pointers when at least one operand of the GEP 1348 // is vector-typed. Thus, to keep the representation compact, we only use 1349 // vector-typed operands for loop-varying values. 1350 1351 if (areAllOperandsInvariant()) { 1352 // If we are vectorizing, but the GEP has only loop-invariant operands, 1353 // the GEP we build (by only using vector-typed operands for 1354 // loop-varying values) would be a scalar pointer. Thus, to ensure we 1355 // produce a vector of pointers, we need to either arbitrarily pick an 1356 // operand to broadcast, or broadcast a clone of the original GEP. 1357 // Here, we broadcast a clone of the original. 1358 // 1359 // TODO: If at some point we decide to scalarize instructions having 1360 // loop-invariant operands, this special case will no longer be 1361 // required. We would add the scalarization decision to 1362 // collectLoopScalars() and teach getVectorValue() to broadcast 1363 // the lane-zero scalar value. 1364 SmallVector<Value *> Ops; 1365 for (unsigned I = 0, E = getNumOperands(); I != E; I++) 1366 Ops.push_back(State.get(getOperand(I), VPIteration(0, 0))); 1367 1368 auto *NewGEP = 1369 State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0], 1370 ArrayRef(Ops).drop_front(), "", isInBounds()); 1371 for (unsigned Part = 0; Part < State.UF; ++Part) { 1372 Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP); 1373 State.set(this, EntryPart, Part); 1374 State.addMetadata(EntryPart, GEP); 1375 } 1376 } else { 1377 // If the GEP has at least one loop-varying operand, we are sure to 1378 // produce a vector of pointers. But if we are only unrolling, we want 1379 // to produce a scalar GEP for each unroll part. Thus, the GEP we 1380 // produce with the code below will be scalar (if VF == 1) or vector 1381 // (otherwise). Note that for the unroll-only case, we still maintain 1382 // values in the vector mapping with initVector, as we do for other 1383 // instructions. 1384 for (unsigned Part = 0; Part < State.UF; ++Part) { 1385 // The pointer operand of the new GEP. If it's loop-invariant, we 1386 // won't broadcast it. 1387 auto *Ptr = isPointerLoopInvariant() 1388 ? State.get(getOperand(0), VPIteration(0, 0)) 1389 : State.get(getOperand(0), Part); 1390 1391 // Collect all the indices for the new GEP. If any index is 1392 // loop-invariant, we won't broadcast it. 1393 SmallVector<Value *, 4> Indices; 1394 for (unsigned I = 1, E = getNumOperands(); I < E; I++) { 1395 VPValue *Operand = getOperand(I); 1396 if (isIndexLoopInvariant(I - 1)) 1397 Indices.push_back(State.get(Operand, VPIteration(0, 0))); 1398 else 1399 Indices.push_back(State.get(Operand, Part)); 1400 } 1401 1402 // Create the new GEP. Note that this GEP may be a scalar if VF == 1, 1403 // but it should be a vector, otherwise. 1404 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr, 1405 Indices, "", isInBounds()); 1406 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && 1407 "NewGEP is not a pointer vector"); 1408 State.set(this, NewGEP, Part); 1409 State.addMetadata(NewGEP, GEP); 1410 } 1411 } 1412 } 1413 1414 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1415 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, 1416 VPSlotTracker &SlotTracker) const { 1417 O << Indent << "WIDEN-GEP "; 1418 O << (isPointerLoopInvariant() ? "Inv" : "Var"); 1419 for (size_t I = 0; I < getNumOperands() - 1; ++I) 1420 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]"; 1421 1422 O << " "; 1423 printAsOperand(O, SlotTracker); 1424 O << " = getelementptr"; 1425 printFlags(O); 1426 printOperands(O, SlotTracker); 1427 } 1428 #endif 1429 1430 void VPVectorPointerRecipe ::execute(VPTransformState &State) { 1431 auto &Builder = State.Builder; 1432 State.setDebugLocFrom(getDebugLoc()); 1433 for (unsigned Part = 0; Part < State.UF; ++Part) { 1434 // Calculate the pointer for the specific unroll-part. 1435 Value *PartPtr = nullptr; 1436 // Use i32 for the gep index type when the value is constant, 1437 // or query DataLayout for a more suitable index type otherwise. 1438 const DataLayout &DL = 1439 Builder.GetInsertBlock()->getModule()->getDataLayout(); 1440 Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0) 1441 ? DL.getIndexType(IndexedTy->getPointerTo()) 1442 : Builder.getInt32Ty(); 1443 Value *Ptr = State.get(getOperand(0), VPIteration(0, 0)); 1444 bool InBounds = isInBounds(); 1445 if (IsReverse) { 1446 // If the address is consecutive but reversed, then the 1447 // wide store needs to start at the last vector element. 1448 // RunTimeVF = VScale * VF.getKnownMinValue() 1449 // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() 1450 Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF); 1451 // NumElt = -Part * RunTimeVF 1452 Value *NumElt = Builder.CreateMul( 1453 ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF); 1454 // LastLane = 1 - RunTimeVF 1455 Value *LastLane = 1456 Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); 1457 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds); 1458 PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds); 1459 } else { 1460 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part); 1461 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds); 1462 } 1463 1464 State.set(this, PartPtr, Part, /*IsScalar*/ true); 1465 } 1466 } 1467 1468 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1469 void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent, 1470 VPSlotTracker &SlotTracker) const { 1471 O << Indent; 1472 printAsOperand(O, SlotTracker); 1473 O << " = vector-pointer "; 1474 if (IsReverse) 1475 O << "(reverse) "; 1476 1477 printOperands(O, SlotTracker); 1478 } 1479 #endif 1480 1481 void VPBlendRecipe::execute(VPTransformState &State) { 1482 State.setDebugLocFrom(getDebugLoc()); 1483 // We know that all PHIs in non-header blocks are converted into 1484 // selects, so we don't have to worry about the insertion order and we 1485 // can just use the builder. 1486 // At this point we generate the predication tree. There may be 1487 // duplications since this is a simple recursive scan, but future 1488 // optimizations will clean it up. 1489 1490 unsigned NumIncoming = getNumIncomingValues(); 1491 1492 // Generate a sequence of selects of the form: 1493 // SELECT(Mask3, In3, 1494 // SELECT(Mask2, In2, 1495 // SELECT(Mask1, In1, 1496 // In0))) 1497 // Note that Mask0 is never used: lanes for which no path reaches this phi and 1498 // are essentially undef are taken from In0. 1499 VectorParts Entry(State.UF); 1500 for (unsigned In = 0; In < NumIncoming; ++In) { 1501 for (unsigned Part = 0; Part < State.UF; ++Part) { 1502 // We might have single edge PHIs (blocks) - use an identity 1503 // 'select' for the first PHI operand. 1504 Value *In0 = State.get(getIncomingValue(In), Part); 1505 if (In == 0) 1506 Entry[Part] = In0; // Initialize with the first incoming value. 1507 else { 1508 // Select between the current value and the previous incoming edge 1509 // based on the incoming mask. 1510 Value *Cond = State.get(getMask(In), Part); 1511 Entry[Part] = 1512 State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi"); 1513 } 1514 } 1515 } 1516 for (unsigned Part = 0; Part < State.UF; ++Part) 1517 State.set(this, Entry[Part], Part); 1518 } 1519 1520 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1521 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, 1522 VPSlotTracker &SlotTracker) const { 1523 O << Indent << "BLEND "; 1524 printAsOperand(O, SlotTracker); 1525 O << " ="; 1526 if (getNumIncomingValues() == 1) { 1527 // Not a User of any mask: not really blending, this is a 1528 // single-predecessor phi. 1529 O << " "; 1530 getIncomingValue(0)->printAsOperand(O, SlotTracker); 1531 } else { 1532 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { 1533 O << " "; 1534 getIncomingValue(I)->printAsOperand(O, SlotTracker); 1535 if (I == 0) 1536 continue; 1537 O << "/"; 1538 getMask(I)->printAsOperand(O, SlotTracker); 1539 } 1540 } 1541 } 1542 #endif 1543 1544 void VPReductionRecipe::execute(VPTransformState &State) { 1545 assert(!State.Instance && "Reduction being replicated."); 1546 Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true); 1547 RecurKind Kind = RdxDesc.getRecurrenceKind(); 1548 // Propagate the fast-math flags carried by the underlying instruction. 1549 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 1550 State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); 1551 for (unsigned Part = 0; Part < State.UF; ++Part) { 1552 Value *NewVecOp = State.get(getVecOp(), Part); 1553 if (VPValue *Cond = getCondOp()) { 1554 Value *NewCond = State.get(Cond, Part, State.VF.isScalar()); 1555 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType()); 1556 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType(); 1557 Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy, 1558 RdxDesc.getFastMathFlags()); 1559 if (State.VF.isVector()) { 1560 Iden = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden); 1561 } 1562 1563 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden); 1564 NewVecOp = Select; 1565 } 1566 Value *NewRed; 1567 Value *NextInChain; 1568 if (IsOrdered) { 1569 if (State.VF.isVector()) 1570 NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp, 1571 PrevInChain); 1572 else 1573 NewRed = State.Builder.CreateBinOp( 1574 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain, 1575 NewVecOp); 1576 PrevInChain = NewRed; 1577 } else { 1578 PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true); 1579 NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp); 1580 } 1581 if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) { 1582 NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(), 1583 NewRed, PrevInChain); 1584 } else if (IsOrdered) 1585 NextInChain = NewRed; 1586 else 1587 NextInChain = State.Builder.CreateBinOp( 1588 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain); 1589 State.set(this, NextInChain, Part, /*IsScalar*/ true); 1590 } 1591 } 1592 1593 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1594 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, 1595 VPSlotTracker &SlotTracker) const { 1596 O << Indent << "REDUCE "; 1597 printAsOperand(O, SlotTracker); 1598 O << " = "; 1599 getChainOp()->printAsOperand(O, SlotTracker); 1600 O << " +"; 1601 if (isa<FPMathOperator>(getUnderlyingInstr())) 1602 O << getUnderlyingInstr()->getFastMathFlags(); 1603 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " ("; 1604 getVecOp()->printAsOperand(O, SlotTracker); 1605 if (getCondOp()) { 1606 O << ", "; 1607 getCondOp()->printAsOperand(O, SlotTracker); 1608 } 1609 O << ")"; 1610 if (RdxDesc.IntermediateStore) 1611 O << " (with final reduction value stored in invariant address sank " 1612 "outside of loop)"; 1613 } 1614 #endif 1615 1616 bool VPReplicateRecipe::shouldPack() const { 1617 // Find if the recipe is used by a widened recipe via an intervening 1618 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector. 1619 return any_of(users(), [](const VPUser *U) { 1620 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U)) 1621 return any_of(PredR->users(), [PredR](const VPUser *U) { 1622 return !U->usesScalars(PredR); 1623 }); 1624 return false; 1625 }); 1626 } 1627 1628 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1629 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, 1630 VPSlotTracker &SlotTracker) const { 1631 O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); 1632 1633 if (!getUnderlyingInstr()->getType()->isVoidTy()) { 1634 printAsOperand(O, SlotTracker); 1635 O << " = "; 1636 } 1637 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) { 1638 O << "call"; 1639 printFlags(O); 1640 O << "@" << CB->getCalledFunction()->getName() << "("; 1641 interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)), 1642 O, [&O, &SlotTracker](VPValue *Op) { 1643 Op->printAsOperand(O, SlotTracker); 1644 }); 1645 O << ")"; 1646 } else { 1647 O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()); 1648 printFlags(O); 1649 printOperands(O, SlotTracker); 1650 } 1651 1652 if (shouldPack()) 1653 O << " (S->V)"; 1654 } 1655 #endif 1656 1657 /// Checks if \p C is uniform across all VFs and UFs. It is considered as such 1658 /// if it is either defined outside the vector region or its operand is known to 1659 /// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI). 1660 /// TODO: Uniformity should be associated with a VPValue and there should be a 1661 /// generic way to check. 1662 static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C) { 1663 return C->isDefinedOutsideVectorRegions() || 1664 isa<VPDerivedIVRecipe>(C->getOperand(0)) || 1665 isa<VPCanonicalIVPHIRecipe>(C->getOperand(0)); 1666 } 1667 1668 Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) { 1669 assert(vputils::onlyFirstLaneUsed(this) && 1670 "Codegen only implemented for first lane."); 1671 switch (Opcode) { 1672 case Instruction::SExt: 1673 case Instruction::ZExt: 1674 case Instruction::Trunc: { 1675 // Note: SExt/ZExt not used yet. 1676 Value *Op = State.get(getOperand(0), VPIteration(Part, 0)); 1677 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy); 1678 } 1679 default: 1680 llvm_unreachable("opcode not implemented yet"); 1681 } 1682 } 1683 1684 void VPScalarCastRecipe ::execute(VPTransformState &State) { 1685 bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this); 1686 for (unsigned Part = 0; Part != State.UF; ++Part) { 1687 Value *Res; 1688 // Only generate a single instance, if the recipe is uniform across UFs and 1689 // VFs. 1690 if (Part > 0 && IsUniformAcrossVFsAndUFs) 1691 Res = State.get(this, VPIteration(0, 0)); 1692 else 1693 Res = generate(State, Part); 1694 State.set(this, Res, VPIteration(Part, 0)); 1695 } 1696 } 1697 1698 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1699 void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent, 1700 VPSlotTracker &SlotTracker) const { 1701 O << Indent << "SCALAR-CAST "; 1702 printAsOperand(O, SlotTracker); 1703 O << " = " << Instruction::getOpcodeName(Opcode) << " "; 1704 printOperands(O, SlotTracker); 1705 O << " to " << *ResultTy; 1706 } 1707 #endif 1708 1709 void VPBranchOnMaskRecipe::execute(VPTransformState &State) { 1710 assert(State.Instance && "Branch on Mask works only on single instance."); 1711 1712 unsigned Part = State.Instance->Part; 1713 unsigned Lane = State.Instance->Lane.getKnownLane(); 1714 1715 Value *ConditionBit = nullptr; 1716 VPValue *BlockInMask = getMask(); 1717 if (BlockInMask) { 1718 ConditionBit = State.get(BlockInMask, Part); 1719 if (ConditionBit->getType()->isVectorTy()) 1720 ConditionBit = State.Builder.CreateExtractElement( 1721 ConditionBit, State.Builder.getInt32(Lane)); 1722 } else // Block in mask is all-one. 1723 ConditionBit = State.Builder.getTrue(); 1724 1725 // Replace the temporary unreachable terminator with a new conditional branch, 1726 // whose two destinations will be set later when they are created. 1727 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator(); 1728 assert(isa<UnreachableInst>(CurrentTerminator) && 1729 "Expected to replace unreachable terminator with conditional branch."); 1730 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit); 1731 CondBr->setSuccessor(0, nullptr); 1732 ReplaceInstWithInst(CurrentTerminator, CondBr); 1733 } 1734 1735 void VPPredInstPHIRecipe::execute(VPTransformState &State) { 1736 assert(State.Instance && "Predicated instruction PHI works per instance."); 1737 Instruction *ScalarPredInst = 1738 cast<Instruction>(State.get(getOperand(0), *State.Instance)); 1739 BasicBlock *PredicatedBB = ScalarPredInst->getParent(); 1740 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); 1741 assert(PredicatingBB && "Predicated block has no single predecessor."); 1742 assert(isa<VPReplicateRecipe>(getOperand(0)) && 1743 "operand must be VPReplicateRecipe"); 1744 1745 // By current pack/unpack logic we need to generate only a single phi node: if 1746 // a vector value for the predicated instruction exists at this point it means 1747 // the instruction has vector users only, and a phi for the vector value is 1748 // needed. In this case the recipe of the predicated instruction is marked to 1749 // also do that packing, thereby "hoisting" the insert-element sequence. 1750 // Otherwise, a phi node for the scalar value is needed. 1751 unsigned Part = State.Instance->Part; 1752 if (State.hasVectorValue(getOperand(0), Part)) { 1753 Value *VectorValue = State.get(getOperand(0), Part); 1754 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue); 1755 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); 1756 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. 1757 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. 1758 if (State.hasVectorValue(this, Part)) 1759 State.reset(this, VPhi, Part); 1760 else 1761 State.set(this, VPhi, Part); 1762 // NOTE: Currently we need to update the value of the operand, so the next 1763 // predicated iteration inserts its generated value in the correct vector. 1764 State.reset(getOperand(0), VPhi, Part); 1765 } else { 1766 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType(); 1767 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); 1768 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), 1769 PredicatingBB); 1770 Phi->addIncoming(ScalarPredInst, PredicatedBB); 1771 if (State.hasScalarValue(this, *State.Instance)) 1772 State.reset(this, Phi, *State.Instance); 1773 else 1774 State.set(this, Phi, *State.Instance); 1775 // NOTE: Currently we need to update the value of the operand, so the next 1776 // predicated iteration inserts its generated value in the correct vector. 1777 State.reset(getOperand(0), Phi, *State.Instance); 1778 } 1779 } 1780 1781 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1782 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1783 VPSlotTracker &SlotTracker) const { 1784 O << Indent << "PHI-PREDICATED-INSTRUCTION "; 1785 printAsOperand(O, SlotTracker); 1786 O << " = "; 1787 printOperands(O, SlotTracker); 1788 } 1789 1790 void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent, 1791 VPSlotTracker &SlotTracker) const { 1792 O << Indent << "WIDEN "; 1793 printAsOperand(O, SlotTracker); 1794 O << " = load "; 1795 printOperands(O, SlotTracker); 1796 } 1797 1798 void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent, 1799 VPSlotTracker &SlotTracker) const { 1800 O << Indent << "WIDEN "; 1801 printAsOperand(O, SlotTracker); 1802 O << " = vp.load "; 1803 printOperands(O, SlotTracker); 1804 } 1805 1806 void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent, 1807 VPSlotTracker &SlotTracker) const { 1808 O << Indent << "WIDEN store "; 1809 printOperands(O, SlotTracker); 1810 } 1811 1812 void VPWidenStoreEVLRecipe::print(raw_ostream &O, const Twine &Indent, 1813 VPSlotTracker &SlotTracker) const { 1814 O << Indent << "WIDEN vp.store "; 1815 printOperands(O, SlotTracker); 1816 } 1817 #endif 1818 1819 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { 1820 Value *Start = getStartValue()->getLiveInIRValue(); 1821 PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index"); 1822 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); 1823 1824 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1825 EntryPart->addIncoming(Start, VectorPH); 1826 EntryPart->setDebugLoc(getDebugLoc()); 1827 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 1828 State.set(this, EntryPart, Part, /*IsScalar*/ true); 1829 } 1830 1831 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1832 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1833 VPSlotTracker &SlotTracker) const { 1834 O << Indent << "EMIT "; 1835 printAsOperand(O, SlotTracker); 1836 O << " = CANONICAL-INDUCTION "; 1837 printOperands(O, SlotTracker); 1838 } 1839 #endif 1840 1841 bool VPCanonicalIVPHIRecipe::isCanonical( 1842 InductionDescriptor::InductionKind Kind, VPValue *Start, 1843 VPValue *Step) const { 1844 // Must be an integer induction. 1845 if (Kind != InductionDescriptor::IK_IntInduction) 1846 return false; 1847 // Start must match the start value of this canonical induction. 1848 if (Start != getStartValue()) 1849 return false; 1850 1851 // If the step is defined by a recipe, it is not a ConstantInt. 1852 if (Step->getDefiningRecipe()) 1853 return false; 1854 1855 ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue()); 1856 return StepC && StepC->isOne(); 1857 } 1858 1859 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) { 1860 return IsScalarAfterVectorization && 1861 (!IsScalable || vputils::onlyFirstLaneUsed(this)); 1862 } 1863 1864 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1865 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, 1866 VPSlotTracker &SlotTracker) const { 1867 O << Indent << "EMIT "; 1868 printAsOperand(O, SlotTracker); 1869 O << " = WIDEN-POINTER-INDUCTION "; 1870 getStartValue()->printAsOperand(O, SlotTracker); 1871 O << ", " << *IndDesc.getStep(); 1872 } 1873 #endif 1874 1875 void VPExpandSCEVRecipe::execute(VPTransformState &State) { 1876 assert(!State.Instance && "cannot be used in per-lane"); 1877 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout(); 1878 SCEVExpander Exp(SE, DL, "induction"); 1879 1880 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), 1881 &*State.Builder.GetInsertPoint()); 1882 assert(!State.ExpandedSCEVs.contains(Expr) && 1883 "Same SCEV expanded multiple times"); 1884 State.ExpandedSCEVs[Expr] = Res; 1885 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 1886 State.set(this, Res, {Part, 0}); 1887 } 1888 1889 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1890 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent, 1891 VPSlotTracker &SlotTracker) const { 1892 O << Indent << "EMIT "; 1893 getVPSingleValue()->printAsOperand(O, SlotTracker); 1894 O << " = EXPAND SCEV " << *Expr; 1895 } 1896 #endif 1897 1898 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { 1899 Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true); 1900 Type *STy = CanonicalIV->getType(); 1901 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 1902 ElementCount VF = State.VF; 1903 Value *VStart = VF.isScalar() 1904 ? CanonicalIV 1905 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast"); 1906 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 1907 Value *VStep = createStepForVF(Builder, STy, VF, Part); 1908 if (VF.isVector()) { 1909 VStep = Builder.CreateVectorSplat(VF, VStep); 1910 VStep = 1911 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType())); 1912 } 1913 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); 1914 State.set(this, CanonicalVectorIV, Part); 1915 } 1916 } 1917 1918 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1919 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, 1920 VPSlotTracker &SlotTracker) const { 1921 O << Indent << "EMIT "; 1922 printAsOperand(O, SlotTracker); 1923 O << " = WIDEN-CANONICAL-INDUCTION "; 1924 printOperands(O, SlotTracker); 1925 } 1926 #endif 1927 1928 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { 1929 auto &Builder = State.Builder; 1930 // Create a vector from the initial value. 1931 auto *VectorInit = getStartValue()->getLiveInIRValue(); 1932 1933 Type *VecTy = State.VF.isScalar() 1934 ? VectorInit->getType() 1935 : VectorType::get(VectorInit->getType(), State.VF); 1936 1937 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1938 if (State.VF.isVector()) { 1939 auto *IdxTy = Builder.getInt32Ty(); 1940 auto *One = ConstantInt::get(IdxTy, 1); 1941 IRBuilder<>::InsertPointGuard Guard(Builder); 1942 Builder.SetInsertPoint(VectorPH->getTerminator()); 1943 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); 1944 auto *LastIdx = Builder.CreateSub(RuntimeVF, One); 1945 VectorInit = Builder.CreateInsertElement( 1946 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); 1947 } 1948 1949 // Create a phi node for the new recurrence. 1950 PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur"); 1951 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); 1952 EntryPart->addIncoming(VectorInit, VectorPH); 1953 State.set(this, EntryPart, 0); 1954 } 1955 1956 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1957 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, 1958 VPSlotTracker &SlotTracker) const { 1959 O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; 1960 printAsOperand(O, SlotTracker); 1961 O << " = phi "; 1962 printOperands(O, SlotTracker); 1963 } 1964 #endif 1965 1966 void VPReductionPHIRecipe::execute(VPTransformState &State) { 1967 auto &Builder = State.Builder; 1968 1969 // Reductions do not have to start at zero. They can start with 1970 // any loop invariant values. 1971 VPValue *StartVPV = getStartValue(); 1972 Value *StartV = StartVPV->getLiveInIRValue(); 1973 1974 // In order to support recurrences we need to be able to vectorize Phi nodes. 1975 // Phi nodes have cycles, so we need to vectorize them in two stages. This is 1976 // stage #1: We create a new vector PHI node with no incoming edges. We'll use 1977 // this value when we vectorize all of the instructions that use the PHI. 1978 bool ScalarPHI = State.VF.isScalar() || IsInLoop; 1979 Type *VecTy = ScalarPHI ? StartV->getType() 1980 : VectorType::get(StartV->getType(), State.VF); 1981 1982 BasicBlock *HeaderBB = State.CFG.PrevBB; 1983 assert(State.CurrentVectorLoop->getHeader() == HeaderBB && 1984 "recipe must be in the vector loop header"); 1985 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF; 1986 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 1987 Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi"); 1988 EntryPart->insertBefore(HeaderBB->getFirstInsertionPt()); 1989 State.set(this, EntryPart, Part, IsInLoop); 1990 } 1991 1992 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1993 1994 Value *Iden = nullptr; 1995 RecurKind RK = RdxDesc.getRecurrenceKind(); 1996 if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || 1997 RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) { 1998 // MinMax and AnyOf reductions have the start value as their identity. 1999 if (ScalarPHI) { 2000 Iden = StartV; 2001 } else { 2002 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 2003 Builder.SetInsertPoint(VectorPH->getTerminator()); 2004 StartV = Iden = 2005 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); 2006 } 2007 } else { 2008 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(), 2009 RdxDesc.getFastMathFlags()); 2010 2011 if (!ScalarPHI) { 2012 Iden = Builder.CreateVectorSplat(State.VF, Iden); 2013 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 2014 Builder.SetInsertPoint(VectorPH->getTerminator()); 2015 Constant *Zero = Builder.getInt32(0); 2016 StartV = Builder.CreateInsertElement(Iden, StartV, Zero); 2017 } 2018 } 2019 2020 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 2021 Value *EntryPart = State.get(this, Part, IsInLoop); 2022 // Make sure to add the reduction start value only to the 2023 // first unroll part. 2024 Value *StartVal = (Part == 0) ? StartV : Iden; 2025 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH); 2026 } 2027 } 2028 2029 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2030 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, 2031 VPSlotTracker &SlotTracker) const { 2032 O << Indent << "WIDEN-REDUCTION-PHI "; 2033 2034 printAsOperand(O, SlotTracker); 2035 O << " = phi "; 2036 printOperands(O, SlotTracker); 2037 } 2038 #endif 2039 2040 void VPWidenPHIRecipe::execute(VPTransformState &State) { 2041 assert(EnableVPlanNativePath && 2042 "Non-native vplans are not expected to have VPWidenPHIRecipes."); 2043 2044 Value *Op0 = State.get(getOperand(0), 0); 2045 Type *VecTy = Op0->getType(); 2046 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); 2047 State.set(this, VecPhi, 0); 2048 } 2049 2050 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2051 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, 2052 VPSlotTracker &SlotTracker) const { 2053 O << Indent << "WIDEN-PHI "; 2054 2055 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue()); 2056 // Unless all incoming values are modeled in VPlan print the original PHI 2057 // directly. 2058 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming 2059 // values as VPValues. 2060 if (getNumOperands() != OriginalPhi->getNumOperands()) { 2061 O << VPlanIngredient(OriginalPhi); 2062 return; 2063 } 2064 2065 printAsOperand(O, SlotTracker); 2066 O << " = phi "; 2067 printOperands(O, SlotTracker); 2068 } 2069 #endif 2070 2071 // TODO: It would be good to use the existing VPWidenPHIRecipe instead and 2072 // remove VPActiveLaneMaskPHIRecipe. 2073 void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) { 2074 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 2075 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 2076 Value *StartMask = State.get(getOperand(0), Part); 2077 PHINode *EntryPart = 2078 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask"); 2079 EntryPart->addIncoming(StartMask, VectorPH); 2080 EntryPart->setDebugLoc(getDebugLoc()); 2081 State.set(this, EntryPart, Part); 2082 } 2083 } 2084 2085 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2086 void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent, 2087 VPSlotTracker &SlotTracker) const { 2088 O << Indent << "ACTIVE-LANE-MASK-PHI "; 2089 2090 printAsOperand(O, SlotTracker); 2091 O << " = phi "; 2092 printOperands(O, SlotTracker); 2093 } 2094 #endif 2095 2096 void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) { 2097 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 2098 assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization."); 2099 Value *Start = State.get(getOperand(0), VPIteration(0, 0)); 2100 PHINode *EntryPart = 2101 State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv"); 2102 EntryPart->addIncoming(Start, VectorPH); 2103 EntryPart->setDebugLoc(getDebugLoc()); 2104 State.set(this, EntryPart, 0, /*IsScalar=*/true); 2105 } 2106 2107 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2108 void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 2109 VPSlotTracker &SlotTracker) const { 2110 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI "; 2111 2112 printAsOperand(O, SlotTracker); 2113 O << " = phi "; 2114 printOperands(O, SlotTracker); 2115 } 2116 #endif 2117