1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains implementations for different VPlan recipes. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "VPlan.h" 15 #include "VPlanAnalysis.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/Twine.h" 19 #include "llvm/Analysis/IVDescriptors.h" 20 #include "llvm/IR/BasicBlock.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/Instruction.h" 23 #include "llvm/IR/Instructions.h" 24 #include "llvm/IR/Type.h" 25 #include "llvm/IR/Value.h" 26 #include "llvm/Support/Casting.h" 27 #include "llvm/Support/CommandLine.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 31 #include "llvm/Transforms/Utils/LoopUtils.h" 32 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 33 #include <cassert> 34 35 using namespace llvm; 36 37 using VectorParts = SmallVector<Value *, 2>; 38 39 namespace llvm { 40 extern cl::opt<bool> EnableVPlanNativePath; 41 } 42 43 #define LV_NAME "loop-vectorize" 44 #define DEBUG_TYPE LV_NAME 45 46 bool VPRecipeBase::mayWriteToMemory() const { 47 switch (getVPDefID()) { 48 case VPInterleaveSC: 49 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0; 50 case VPWidenStoreEVLSC: 51 case VPWidenStoreSC: 52 return true; 53 case VPReplicateSC: 54 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 55 ->mayWriteToMemory(); 56 case VPWidenCallSC: 57 return !cast<VPWidenCallRecipe>(this) 58 ->getCalledScalarFunction() 59 ->onlyReadsMemory(); 60 case VPBranchOnMaskSC: 61 case VPScalarIVStepsSC: 62 case VPPredInstPHISC: 63 return false; 64 case VPBlendSC: 65 case VPReductionSC: 66 case VPWidenCanonicalIVSC: 67 case VPWidenCastSC: 68 case VPWidenGEPSC: 69 case VPWidenIntOrFpInductionSC: 70 case VPWidenLoadEVLSC: 71 case VPWidenLoadSC: 72 case VPWidenPHISC: 73 case VPWidenSC: 74 case VPWidenSelectSC: { 75 const Instruction *I = 76 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 77 (void)I; 78 assert((!I || !I->mayWriteToMemory()) && 79 "underlying instruction may write to memory"); 80 return false; 81 } 82 default: 83 return true; 84 } 85 } 86 87 bool VPRecipeBase::mayReadFromMemory() const { 88 switch (getVPDefID()) { 89 case VPWidenLoadEVLSC: 90 case VPWidenLoadSC: 91 return true; 92 case VPReplicateSC: 93 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 94 ->mayReadFromMemory(); 95 case VPWidenCallSC: 96 return !cast<VPWidenCallRecipe>(this) 97 ->getCalledScalarFunction() 98 ->onlyWritesMemory(); 99 case VPBranchOnMaskSC: 100 case VPPredInstPHISC: 101 case VPScalarIVStepsSC: 102 case VPWidenStoreEVLSC: 103 case VPWidenStoreSC: 104 return false; 105 case VPBlendSC: 106 case VPReductionSC: 107 case VPWidenCanonicalIVSC: 108 case VPWidenCastSC: 109 case VPWidenGEPSC: 110 case VPWidenIntOrFpInductionSC: 111 case VPWidenPHISC: 112 case VPWidenSC: 113 case VPWidenSelectSC: { 114 const Instruction *I = 115 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 116 (void)I; 117 assert((!I || !I->mayReadFromMemory()) && 118 "underlying instruction may read from memory"); 119 return false; 120 } 121 default: 122 return true; 123 } 124 } 125 126 bool VPRecipeBase::mayHaveSideEffects() const { 127 switch (getVPDefID()) { 128 case VPDerivedIVSC: 129 case VPPredInstPHISC: 130 case VPScalarCastSC: 131 return false; 132 case VPInstructionSC: 133 switch (cast<VPInstruction>(this)->getOpcode()) { 134 case Instruction::Or: 135 case Instruction::ICmp: 136 case Instruction::Select: 137 case VPInstruction::Not: 138 case VPInstruction::CalculateTripCountMinusVF: 139 case VPInstruction::CanonicalIVIncrementForPart: 140 case VPInstruction::LogicalAnd: 141 case VPInstruction::PtrAdd: 142 return false; 143 default: 144 return true; 145 } 146 case VPWidenCallSC: { 147 Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction(); 148 return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn(); 149 } 150 case VPBlendSC: 151 case VPReductionSC: 152 case VPScalarIVStepsSC: 153 case VPWidenCanonicalIVSC: 154 case VPWidenCastSC: 155 case VPWidenGEPSC: 156 case VPWidenIntOrFpInductionSC: 157 case VPWidenPHISC: 158 case VPWidenPointerInductionSC: 159 case VPWidenSC: 160 case VPWidenSelectSC: { 161 const Instruction *I = 162 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 163 (void)I; 164 assert((!I || !I->mayHaveSideEffects()) && 165 "underlying instruction has side-effects"); 166 return false; 167 } 168 case VPInterleaveSC: 169 return mayWriteToMemory(); 170 case VPWidenLoadEVLSC: 171 case VPWidenLoadSC: 172 case VPWidenStoreEVLSC: 173 case VPWidenStoreSC: 174 assert( 175 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() == 176 mayWriteToMemory() && 177 "mayHaveSideffects result for ingredient differs from this " 178 "implementation"); 179 return mayWriteToMemory(); 180 case VPReplicateSC: { 181 auto *R = cast<VPReplicateRecipe>(this); 182 return R->getUnderlyingInstr()->mayHaveSideEffects(); 183 } 184 default: 185 return true; 186 } 187 } 188 189 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { 190 auto Lane = VPLane::getLastLaneForVF(State.VF); 191 VPValue *ExitValue = getOperand(0); 192 if (vputils::isUniformAfterVectorization(ExitValue)) 193 Lane = VPLane::getFirstLane(); 194 VPBasicBlock *MiddleVPBB = 195 cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor()); 196 assert(MiddleVPBB->getNumSuccessors() == 0 && 197 "the middle block must not have any successors"); 198 BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB]; 199 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)), 200 MiddleBB); 201 } 202 203 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 204 void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const { 205 O << "Live-out "; 206 getPhi()->printAsOperand(O); 207 O << " = "; 208 getOperand(0)->printAsOperand(O, SlotTracker); 209 O << "\n"; 210 } 211 #endif 212 213 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { 214 assert(!Parent && "Recipe already in some VPBasicBlock"); 215 assert(InsertPos->getParent() && 216 "Insertion position not in any VPBasicBlock"); 217 InsertPos->getParent()->insert(this, InsertPos->getIterator()); 218 } 219 220 void VPRecipeBase::insertBefore(VPBasicBlock &BB, 221 iplist<VPRecipeBase>::iterator I) { 222 assert(!Parent && "Recipe already in some VPBasicBlock"); 223 assert(I == BB.end() || I->getParent() == &BB); 224 BB.insert(this, I); 225 } 226 227 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { 228 assert(!Parent && "Recipe already in some VPBasicBlock"); 229 assert(InsertPos->getParent() && 230 "Insertion position not in any VPBasicBlock"); 231 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator())); 232 } 233 234 void VPRecipeBase::removeFromParent() { 235 assert(getParent() && "Recipe not in any VPBasicBlock"); 236 getParent()->getRecipeList().remove(getIterator()); 237 Parent = nullptr; 238 } 239 240 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { 241 assert(getParent() && "Recipe not in any VPBasicBlock"); 242 return getParent()->getRecipeList().erase(getIterator()); 243 } 244 245 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { 246 removeFromParent(); 247 insertAfter(InsertPos); 248 } 249 250 void VPRecipeBase::moveBefore(VPBasicBlock &BB, 251 iplist<VPRecipeBase>::iterator I) { 252 removeFromParent(); 253 insertBefore(BB, I); 254 } 255 256 FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const { 257 assert(OpType == OperationType::FPMathOp && 258 "recipe doesn't have fast math flags"); 259 FastMathFlags Res; 260 Res.setAllowReassoc(FMFs.AllowReassoc); 261 Res.setNoNaNs(FMFs.NoNaNs); 262 Res.setNoInfs(FMFs.NoInfs); 263 Res.setNoSignedZeros(FMFs.NoSignedZeros); 264 Res.setAllowReciprocal(FMFs.AllowReciprocal); 265 Res.setAllowContract(FMFs.AllowContract); 266 Res.setApproxFunc(FMFs.ApproxFunc); 267 return Res; 268 } 269 270 VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, 271 VPValue *A, VPValue *B, DebugLoc DL, 272 const Twine &Name) 273 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}), 274 Pred, DL), 275 Opcode(Opcode), Name(Name.str()) { 276 assert(Opcode == Instruction::ICmp && 277 "only ICmp predicates supported at the moment"); 278 } 279 280 VPInstruction::VPInstruction(unsigned Opcode, 281 std::initializer_list<VPValue *> Operands, 282 FastMathFlags FMFs, DebugLoc DL, const Twine &Name) 283 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL), 284 Opcode(Opcode), Name(Name.str()) { 285 // Make sure the VPInstruction is a floating-point operation. 286 assert(isFPMathOp() && "this op can't take fast-math flags"); 287 } 288 289 bool VPInstruction::doesGeneratePerAllLanes() const { 290 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this); 291 } 292 293 bool VPInstruction::canGenerateScalarForFirstLane() const { 294 if (Instruction::isBinaryOp(getOpcode())) 295 return true; 296 297 switch (Opcode) { 298 case VPInstruction::BranchOnCond: 299 case VPInstruction::BranchOnCount: 300 case VPInstruction::CalculateTripCountMinusVF: 301 case VPInstruction::CanonicalIVIncrementForPart: 302 case VPInstruction::ComputeReductionResult: 303 case VPInstruction::PtrAdd: 304 case VPInstruction::ExplicitVectorLength: 305 return true; 306 default: 307 return false; 308 } 309 } 310 311 Value *VPInstruction::generatePerLane(VPTransformState &State, 312 const VPIteration &Lane) { 313 IRBuilderBase &Builder = State.Builder; 314 315 assert(getOpcode() == VPInstruction::PtrAdd && 316 "only PtrAdd opcodes are supported for now"); 317 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane), 318 State.get(getOperand(1), Lane), Name); 319 } 320 321 Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { 322 IRBuilderBase &Builder = State.Builder; 323 324 if (Instruction::isBinaryOp(getOpcode())) { 325 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); 326 if (Part != 0 && vputils::onlyFirstPartUsed(this)) 327 return State.get(this, 0, OnlyFirstLaneUsed); 328 329 Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed); 330 Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed); 331 auto *Res = 332 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name); 333 if (auto *I = dyn_cast<Instruction>(Res)) 334 setFlags(I); 335 return Res; 336 } 337 338 switch (getOpcode()) { 339 case VPInstruction::Not: { 340 Value *A = State.get(getOperand(0), Part); 341 return Builder.CreateNot(A, Name); 342 } 343 case Instruction::ICmp: { 344 Value *A = State.get(getOperand(0), Part); 345 Value *B = State.get(getOperand(1), Part); 346 return Builder.CreateCmp(getPredicate(), A, B, Name); 347 } 348 case Instruction::Select: { 349 Value *Cond = State.get(getOperand(0), Part); 350 Value *Op1 = State.get(getOperand(1), Part); 351 Value *Op2 = State.get(getOperand(2), Part); 352 return Builder.CreateSelect(Cond, Op1, Op2, Name); 353 } 354 case VPInstruction::ActiveLaneMask: { 355 // Get first lane of vector induction variable. 356 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); 357 // Get the original loop tripcount. 358 Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0)); 359 360 // If this part of the active lane mask is scalar, generate the CMP directly 361 // to avoid unnecessary extracts. 362 if (State.VF.isScalar()) 363 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC, 364 Name); 365 366 auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); 367 auto *PredTy = VectorType::get(Int1Ty, State.VF); 368 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask, 369 {PredTy, ScalarTC->getType()}, 370 {VIVElem0, ScalarTC}, nullptr, Name); 371 } 372 case VPInstruction::FirstOrderRecurrenceSplice: { 373 // Generate code to combine the previous and current values in vector v3. 374 // 375 // vector.ph: 376 // v_init = vector(..., ..., ..., a[-1]) 377 // br vector.body 378 // 379 // vector.body 380 // i = phi [0, vector.ph], [i+4, vector.body] 381 // v1 = phi [v_init, vector.ph], [v2, vector.body] 382 // v2 = a[i, i+1, i+2, i+3]; 383 // v3 = vector(v1(3), v2(0, 1, 2)) 384 385 // For the first part, use the recurrence phi (v1), otherwise v2. 386 auto *V1 = State.get(getOperand(0), 0); 387 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); 388 if (!PartMinus1->getType()->isVectorTy()) 389 return PartMinus1; 390 Value *V2 = State.get(getOperand(1), Part); 391 return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name); 392 } 393 case VPInstruction::CalculateTripCountMinusVF: { 394 if (Part != 0) 395 return State.get(this, 0, /*IsScalar*/ true); 396 397 Value *ScalarTC = State.get(getOperand(0), {0, 0}); 398 Value *Step = 399 createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF); 400 Value *Sub = Builder.CreateSub(ScalarTC, Step); 401 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step); 402 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0); 403 return Builder.CreateSelect(Cmp, Sub, Zero); 404 } 405 case VPInstruction::ExplicitVectorLength: { 406 // Compute EVL 407 auto GetEVL = [=](VPTransformState &State, Value *AVL) { 408 assert(AVL->getType()->isIntegerTy() && 409 "Requested vector length should be an integer."); 410 411 // TODO: Add support for MaxSafeDist for correct loop emission. 412 assert(State.VF.isScalable() && "Expected scalable vector factor."); 413 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue()); 414 415 Value *EVL = State.Builder.CreateIntrinsic( 416 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length, 417 {AVL, VFArg, State.Builder.getTrue()}); 418 return EVL; 419 }; 420 // TODO: Restructure this code with an explicit remainder loop, vsetvli can 421 // be outside of the main loop. 422 assert(Part == 0 && "No unrolling expected for predicated vectorization."); 423 // Compute VTC - IV as the AVL (requested vector length). 424 Value *Index = State.get(getOperand(0), VPIteration(0, 0)); 425 Value *TripCount = State.get(getOperand(1), VPIteration(0, 0)); 426 Value *AVL = State.Builder.CreateSub(TripCount, Index); 427 Value *EVL = GetEVL(State, AVL); 428 return EVL; 429 } 430 case VPInstruction::CanonicalIVIncrementForPart: { 431 auto *IV = State.get(getOperand(0), VPIteration(0, 0)); 432 if (Part == 0) 433 return IV; 434 435 // The canonical IV is incremented by the vectorization factor (num of SIMD 436 // elements) times the unroll part. 437 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part); 438 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(), 439 hasNoSignedWrap()); 440 } 441 case VPInstruction::BranchOnCond: { 442 if (Part != 0) 443 return nullptr; 444 445 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0)); 446 VPRegionBlock *ParentRegion = getParent()->getParent(); 447 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock(); 448 449 // Replace the temporary unreachable terminator with a new conditional 450 // branch, hooking it up to backward destination for exiting blocks now and 451 // to forward destination(s) later when they are created. 452 BranchInst *CondBr = 453 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr); 454 455 if (getParent()->isExiting()) 456 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]); 457 458 CondBr->setSuccessor(0, nullptr); 459 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 460 return CondBr; 461 } 462 case VPInstruction::BranchOnCount: { 463 if (Part != 0) 464 return nullptr; 465 // First create the compare. 466 Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true); 467 Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true); 468 Value *Cond = Builder.CreateICmpEQ(IV, TC); 469 470 // Now create the branch. 471 auto *Plan = getParent()->getPlan(); 472 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); 473 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); 474 475 // Replace the temporary unreachable terminator with a new conditional 476 // branch, hooking it up to backward destination (the header) now and to the 477 // forward destination (the exit/middle block) later when it is created. 478 // Note that CreateCondBr expects a valid BB as first argument, so we need 479 // to set it to nullptr later. 480 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), 481 State.CFG.VPBB2IRBB[Header]); 482 CondBr->setSuccessor(0, nullptr); 483 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 484 return CondBr; 485 } 486 case VPInstruction::ComputeReductionResult: { 487 if (Part != 0) 488 return State.get(this, 0, /*IsScalar*/ true); 489 490 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary 491 // and will be removed by breaking up the recipe further. 492 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0)); 493 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue()); 494 // Get its reduction variable descriptor. 495 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); 496 497 RecurKind RK = RdxDesc.getRecurrenceKind(); 498 499 VPValue *LoopExitingDef = getOperand(1); 500 Type *PhiTy = OrigPhi->getType(); 501 VectorParts RdxParts(State.UF); 502 for (unsigned Part = 0; Part < State.UF; ++Part) 503 RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop()); 504 505 // If the vector reduction can be performed in a smaller type, we truncate 506 // then extend the loop exit value to enable InstCombine to evaluate the 507 // entire expression in the smaller type. 508 // TODO: Handle this in truncateToMinBW. 509 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { 510 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF); 511 for (unsigned Part = 0; Part < State.UF; ++Part) 512 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); 513 } 514 // Reduce all of the unrolled parts into a single vector. 515 Value *ReducedPartRdx = RdxParts[0]; 516 unsigned Op = RecurrenceDescriptor::getOpcode(RK); 517 if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) 518 Op = Instruction::Or; 519 520 if (PhiR->isOrdered()) { 521 ReducedPartRdx = RdxParts[State.UF - 1]; 522 } else { 523 // Floating-point operations should have some FMF to enable the reduction. 524 IRBuilderBase::FastMathFlagGuard FMFG(Builder); 525 Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); 526 for (unsigned Part = 1; Part < State.UF; ++Part) { 527 Value *RdxPart = RdxParts[Part]; 528 if (Op != Instruction::ICmp && Op != Instruction::FCmp) 529 ReducedPartRdx = Builder.CreateBinOp( 530 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx"); 531 else 532 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart); 533 } 534 } 535 536 // Create the reduction after the loop. Note that inloop reductions create 537 // the target reduction in the loop using a Reduction recipe. 538 if ((State.VF.isVector() || 539 RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) && 540 !PhiR->isInLoop()) { 541 ReducedPartRdx = 542 createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi); 543 // If the reduction can be performed in a smaller type, we need to extend 544 // the reduction to the wider type before we branch to the original loop. 545 if (PhiTy != RdxDesc.getRecurrenceType()) 546 ReducedPartRdx = RdxDesc.isSigned() 547 ? Builder.CreateSExt(ReducedPartRdx, PhiTy) 548 : Builder.CreateZExt(ReducedPartRdx, PhiTy); 549 } 550 551 // If there were stores of the reduction value to a uniform memory address 552 // inside the loop, create the final store here. 553 if (StoreInst *SI = RdxDesc.IntermediateStore) { 554 auto *NewSI = Builder.CreateAlignedStore( 555 ReducedPartRdx, SI->getPointerOperand(), SI->getAlign()); 556 propagateMetadata(NewSI, SI); 557 } 558 559 return ReducedPartRdx; 560 } 561 case VPInstruction::LogicalAnd: { 562 Value *A = State.get(getOperand(0), Part); 563 Value *B = State.get(getOperand(1), Part); 564 return Builder.CreateLogicalAnd(A, B, Name); 565 } 566 case VPInstruction::PtrAdd: { 567 assert(vputils::onlyFirstLaneUsed(this) && 568 "can only generate first lane for PtrAdd"); 569 Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true); 570 Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true); 571 return Builder.CreatePtrAdd(Ptr, Addend, Name); 572 } 573 default: 574 llvm_unreachable("Unsupported opcode for instruction"); 575 } 576 } 577 578 #if !defined(NDEBUG) 579 bool VPInstruction::isFPMathOp() const { 580 // Inspired by FPMathOperator::classof. Notable differences are that we don't 581 // support Call, PHI and Select opcodes here yet. 582 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul || 583 Opcode == Instruction::FNeg || Opcode == Instruction::FSub || 584 Opcode == Instruction::FDiv || Opcode == Instruction::FRem || 585 Opcode == Instruction::FCmp || Opcode == Instruction::Select; 586 } 587 #endif 588 589 void VPInstruction::execute(VPTransformState &State) { 590 assert(!State.Instance && "VPInstruction executing an Instance"); 591 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 592 assert((hasFastMathFlags() == isFPMathOp() || 593 getOpcode() == Instruction::Select) && 594 "Recipe not a FPMathOp but has fast-math flags?"); 595 if (hasFastMathFlags()) 596 State.Builder.setFastMathFlags(getFastMathFlags()); 597 State.setDebugLocFrom(getDebugLoc()); 598 bool GeneratesPerFirstLaneOnly = 599 canGenerateScalarForFirstLane() && 600 (vputils::onlyFirstLaneUsed(this) || 601 getOpcode() == VPInstruction::ComputeReductionResult); 602 bool GeneratesPerAllLanes = doesGeneratePerAllLanes(); 603 for (unsigned Part = 0; Part < State.UF; ++Part) { 604 if (GeneratesPerAllLanes) { 605 for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue(); 606 Lane != NumLanes; ++Lane) { 607 Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane)); 608 assert(GeneratedValue && "generatePerLane must produce a value"); 609 State.set(this, GeneratedValue, VPIteration(Part, Lane)); 610 } 611 continue; 612 } 613 614 Value *GeneratedValue = generatePerPart(State, Part); 615 if (!hasResult()) 616 continue; 617 assert(GeneratedValue && "generatePerPart must produce a value"); 618 assert((GeneratedValue->getType()->isVectorTy() == 619 !GeneratesPerFirstLaneOnly || 620 State.VF.isScalar()) && 621 "scalar value but not only first lane defined"); 622 State.set(this, GeneratedValue, Part, 623 /*IsScalar*/ GeneratesPerFirstLaneOnly); 624 } 625 } 626 627 bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { 628 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); 629 if (Instruction::isBinaryOp(getOpcode())) 630 return vputils::onlyFirstLaneUsed(this); 631 632 switch (getOpcode()) { 633 default: 634 return false; 635 case Instruction::ICmp: 636 case VPInstruction::PtrAdd: 637 // TODO: Cover additional opcodes. 638 return vputils::onlyFirstLaneUsed(this); 639 case VPInstruction::ActiveLaneMask: 640 case VPInstruction::ExplicitVectorLength: 641 case VPInstruction::CalculateTripCountMinusVF: 642 case VPInstruction::CanonicalIVIncrementForPart: 643 case VPInstruction::BranchOnCount: 644 return true; 645 }; 646 llvm_unreachable("switch should return"); 647 } 648 649 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 650 void VPInstruction::dump() const { 651 VPSlotTracker SlotTracker(getParent()->getPlan()); 652 print(dbgs(), "", SlotTracker); 653 } 654 655 void VPInstruction::print(raw_ostream &O, const Twine &Indent, 656 VPSlotTracker &SlotTracker) const { 657 O << Indent << "EMIT "; 658 659 if (hasResult()) { 660 printAsOperand(O, SlotTracker); 661 O << " = "; 662 } 663 664 switch (getOpcode()) { 665 case VPInstruction::Not: 666 O << "not"; 667 break; 668 case VPInstruction::SLPLoad: 669 O << "combined load"; 670 break; 671 case VPInstruction::SLPStore: 672 O << "combined store"; 673 break; 674 case VPInstruction::ActiveLaneMask: 675 O << "active lane mask"; 676 break; 677 case VPInstruction::ExplicitVectorLength: 678 O << "EXPLICIT-VECTOR-LENGTH"; 679 break; 680 case VPInstruction::FirstOrderRecurrenceSplice: 681 O << "first-order splice"; 682 break; 683 case VPInstruction::BranchOnCond: 684 O << "branch-on-cond"; 685 break; 686 case VPInstruction::CalculateTripCountMinusVF: 687 O << "TC > VF ? TC - VF : 0"; 688 break; 689 case VPInstruction::CanonicalIVIncrementForPart: 690 O << "VF * Part +"; 691 break; 692 case VPInstruction::BranchOnCount: 693 O << "branch-on-count"; 694 break; 695 case VPInstruction::ComputeReductionResult: 696 O << "compute-reduction-result"; 697 break; 698 case VPInstruction::LogicalAnd: 699 O << "logical-and"; 700 break; 701 case VPInstruction::PtrAdd: 702 O << "ptradd"; 703 break; 704 default: 705 O << Instruction::getOpcodeName(getOpcode()); 706 } 707 708 printFlags(O); 709 printOperands(O, SlotTracker); 710 711 if (auto DL = getDebugLoc()) { 712 O << ", !dbg "; 713 DL.print(O); 714 } 715 } 716 #endif 717 718 void VPWidenCallRecipe::execute(VPTransformState &State) { 719 assert(State.VF.isVector() && "not widening"); 720 Function *CalledScalarFn = getCalledScalarFunction(); 721 assert(!isDbgInfoIntrinsic(CalledScalarFn->getIntrinsicID()) && 722 "DbgInfoIntrinsic should have been dropped during VPlan construction"); 723 State.setDebugLocFrom(getDebugLoc()); 724 725 bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic; 726 FunctionType *VFTy = nullptr; 727 if (Variant) 728 VFTy = Variant->getFunctionType(); 729 for (unsigned Part = 0; Part < State.UF; ++Part) { 730 SmallVector<Type *, 2> TysForDecl; 731 // Add return type if intrinsic is overloaded on it. 732 if (UseIntrinsic && 733 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) 734 TysForDecl.push_back(VectorType::get( 735 CalledScalarFn->getReturnType()->getScalarType(), State.VF)); 736 SmallVector<Value *, 4> Args; 737 for (const auto &I : enumerate(arg_operands())) { 738 // Some intrinsics have a scalar argument - don't replace it with a 739 // vector. 740 Value *Arg; 741 if (UseIntrinsic && 742 isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())) 743 Arg = State.get(I.value(), VPIteration(0, 0)); 744 // Some vectorized function variants may also take a scalar argument, 745 // e.g. linear parameters for pointers. This needs to be the scalar value 746 // from the start of the respective part when interleaving. 747 else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy()) 748 Arg = State.get(I.value(), VPIteration(Part, 0)); 749 else 750 Arg = State.get(I.value(), Part); 751 if (UseIntrinsic && 752 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index())) 753 TysForDecl.push_back(Arg->getType()); 754 Args.push_back(Arg); 755 } 756 757 Function *VectorF; 758 if (UseIntrinsic) { 759 // Use vector version of the intrinsic. 760 Module *M = State.Builder.GetInsertBlock()->getModule(); 761 VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl); 762 assert(VectorF && "Can't retrieve vector intrinsic."); 763 } else { 764 #ifndef NDEBUG 765 assert(Variant != nullptr && "Can't create vector function."); 766 #endif 767 VectorF = Variant; 768 } 769 770 auto *CI = cast_or_null<CallInst>(getUnderlyingInstr()); 771 SmallVector<OperandBundleDef, 1> OpBundles; 772 if (CI) 773 CI->getOperandBundlesAsDefs(OpBundles); 774 775 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles); 776 777 if (isa<FPMathOperator>(V)) 778 V->copyFastMathFlags(CI); 779 780 if (!V->getType()->isVoidTy()) 781 State.set(this, V, Part); 782 State.addMetadata(V, CI); 783 } 784 } 785 786 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 787 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, 788 VPSlotTracker &SlotTracker) const { 789 O << Indent << "WIDEN-CALL "; 790 791 Function *CalledFn = getCalledScalarFunction(); 792 if (CalledFn->getReturnType()->isVoidTy()) 793 O << "void "; 794 else { 795 printAsOperand(O, SlotTracker); 796 O << " = "; 797 } 798 799 O << "call @" << CalledFn->getName() << "("; 800 interleaveComma(arg_operands(), O, [&O, &SlotTracker](VPValue *Op) { 801 Op->printAsOperand(O, SlotTracker); 802 }); 803 O << ")"; 804 805 if (VectorIntrinsicID) 806 O << " (using vector intrinsic)"; 807 else { 808 O << " (using library function"; 809 if (Variant->hasName()) 810 O << ": " << Variant->getName(); 811 O << ")"; 812 } 813 } 814 815 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, 816 VPSlotTracker &SlotTracker) const { 817 O << Indent << "WIDEN-SELECT "; 818 printAsOperand(O, SlotTracker); 819 O << " = select "; 820 getOperand(0)->printAsOperand(O, SlotTracker); 821 O << ", "; 822 getOperand(1)->printAsOperand(O, SlotTracker); 823 O << ", "; 824 getOperand(2)->printAsOperand(O, SlotTracker); 825 O << (isInvariantCond() ? " (condition is loop invariant)" : ""); 826 } 827 #endif 828 829 void VPWidenSelectRecipe::execute(VPTransformState &State) { 830 State.setDebugLocFrom(getDebugLoc()); 831 832 // The condition can be loop invariant but still defined inside the 833 // loop. This means that we can't just use the original 'cond' value. 834 // We have to take the 'vectorized' value and pick the first lane. 835 // Instcombine will make this a no-op. 836 auto *InvarCond = 837 isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr; 838 839 for (unsigned Part = 0; Part < State.UF; ++Part) { 840 Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part); 841 Value *Op0 = State.get(getOperand(1), Part); 842 Value *Op1 = State.get(getOperand(2), Part); 843 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1); 844 State.set(this, Sel, Part); 845 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue())); 846 } 847 } 848 849 VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy( 850 const FastMathFlags &FMF) { 851 AllowReassoc = FMF.allowReassoc(); 852 NoNaNs = FMF.noNaNs(); 853 NoInfs = FMF.noInfs(); 854 NoSignedZeros = FMF.noSignedZeros(); 855 AllowReciprocal = FMF.allowReciprocal(); 856 AllowContract = FMF.allowContract(); 857 ApproxFunc = FMF.approxFunc(); 858 } 859 860 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 861 void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const { 862 switch (OpType) { 863 case OperationType::Cmp: 864 O << " " << CmpInst::getPredicateName(getPredicate()); 865 break; 866 case OperationType::DisjointOp: 867 if (DisjointFlags.IsDisjoint) 868 O << " disjoint"; 869 break; 870 case OperationType::PossiblyExactOp: 871 if (ExactFlags.IsExact) 872 O << " exact"; 873 break; 874 case OperationType::OverflowingBinOp: 875 if (WrapFlags.HasNUW) 876 O << " nuw"; 877 if (WrapFlags.HasNSW) 878 O << " nsw"; 879 break; 880 case OperationType::FPMathOp: 881 getFastMathFlags().print(O); 882 break; 883 case OperationType::GEPOp: 884 if (GEPFlags.IsInBounds) 885 O << " inbounds"; 886 break; 887 case OperationType::NonNegOp: 888 if (NonNegFlags.NonNeg) 889 O << " nneg"; 890 break; 891 case OperationType::Other: 892 break; 893 } 894 if (getNumOperands() > 0) 895 O << " "; 896 } 897 #endif 898 899 void VPWidenRecipe::execute(VPTransformState &State) { 900 State.setDebugLocFrom(getDebugLoc()); 901 auto &Builder = State.Builder; 902 switch (Opcode) { 903 case Instruction::Call: 904 case Instruction::Br: 905 case Instruction::PHI: 906 case Instruction::GetElementPtr: 907 case Instruction::Select: 908 llvm_unreachable("This instruction is handled by a different recipe."); 909 case Instruction::UDiv: 910 case Instruction::SDiv: 911 case Instruction::SRem: 912 case Instruction::URem: 913 case Instruction::Add: 914 case Instruction::FAdd: 915 case Instruction::Sub: 916 case Instruction::FSub: 917 case Instruction::FNeg: 918 case Instruction::Mul: 919 case Instruction::FMul: 920 case Instruction::FDiv: 921 case Instruction::FRem: 922 case Instruction::Shl: 923 case Instruction::LShr: 924 case Instruction::AShr: 925 case Instruction::And: 926 case Instruction::Or: 927 case Instruction::Xor: { 928 // Just widen unops and binops. 929 for (unsigned Part = 0; Part < State.UF; ++Part) { 930 SmallVector<Value *, 2> Ops; 931 for (VPValue *VPOp : operands()) 932 Ops.push_back(State.get(VPOp, Part)); 933 934 Value *V = Builder.CreateNAryOp(Opcode, Ops); 935 936 if (auto *VecOp = dyn_cast<Instruction>(V)) 937 setFlags(VecOp); 938 939 // Use this vector value for all users of the original instruction. 940 State.set(this, V, Part); 941 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue())); 942 } 943 944 break; 945 } 946 case Instruction::Freeze: { 947 for (unsigned Part = 0; Part < State.UF; ++Part) { 948 Value *Op = State.get(getOperand(0), Part); 949 950 Value *Freeze = Builder.CreateFreeze(Op); 951 State.set(this, Freeze, Part); 952 } 953 break; 954 } 955 case Instruction::ICmp: 956 case Instruction::FCmp: { 957 // Widen compares. Generate vector compares. 958 bool FCmp = Opcode == Instruction::FCmp; 959 for (unsigned Part = 0; Part < State.UF; ++Part) { 960 Value *A = State.get(getOperand(0), Part); 961 Value *B = State.get(getOperand(1), Part); 962 Value *C = nullptr; 963 if (FCmp) { 964 // Propagate fast math flags. 965 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 966 if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue())) 967 Builder.setFastMathFlags(I->getFastMathFlags()); 968 C = Builder.CreateFCmp(getPredicate(), A, B); 969 } else { 970 C = Builder.CreateICmp(getPredicate(), A, B); 971 } 972 State.set(this, C, Part); 973 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue())); 974 } 975 976 break; 977 } 978 default: 979 // This instruction is not vectorized by simple widening. 980 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : " 981 << Instruction::getOpcodeName(Opcode)); 982 llvm_unreachable("Unhandled instruction!"); 983 } // end of switch. 984 985 #if !defined(NDEBUG) 986 // Verify that VPlan type inference results agree with the type of the 987 // generated values. 988 for (unsigned Part = 0; Part < State.UF; ++Part) { 989 assert(VectorType::get(State.TypeAnalysis.inferScalarType(this), 990 State.VF) == State.get(this, Part)->getType() && 991 "inferred type and type from generated instructions do not match"); 992 } 993 #endif 994 } 995 996 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 997 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, 998 VPSlotTracker &SlotTracker) const { 999 O << Indent << "WIDEN "; 1000 printAsOperand(O, SlotTracker); 1001 O << " = " << Instruction::getOpcodeName(Opcode); 1002 printFlags(O); 1003 printOperands(O, SlotTracker); 1004 } 1005 #endif 1006 1007 void VPWidenCastRecipe::execute(VPTransformState &State) { 1008 State.setDebugLocFrom(getDebugLoc()); 1009 auto &Builder = State.Builder; 1010 /// Vectorize casts. 1011 assert(State.VF.isVector() && "Not vectorizing?"); 1012 Type *DestTy = VectorType::get(getResultType(), State.VF); 1013 VPValue *Op = getOperand(0); 1014 for (unsigned Part = 0; Part < State.UF; ++Part) { 1015 if (Part > 0 && Op->isLiveIn()) { 1016 // FIXME: Remove once explicit unrolling is implemented using VPlan. 1017 State.set(this, State.get(this, 0), Part); 1018 continue; 1019 } 1020 Value *A = State.get(Op, Part); 1021 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy); 1022 State.set(this, Cast, Part); 1023 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue())); 1024 } 1025 } 1026 1027 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1028 void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent, 1029 VPSlotTracker &SlotTracker) const { 1030 O << Indent << "WIDEN-CAST "; 1031 printAsOperand(O, SlotTracker); 1032 O << " = " << Instruction::getOpcodeName(Opcode) << " "; 1033 printFlags(O); 1034 printOperands(O, SlotTracker); 1035 O << " to " << *getResultType(); 1036 } 1037 #endif 1038 1039 /// This function adds 1040 /// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...) 1041 /// to each vector element of Val. The sequence starts at StartIndex. 1042 /// \p Opcode is relevant for FP induction variable. 1043 static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step, 1044 Instruction::BinaryOps BinOp, ElementCount VF, 1045 IRBuilderBase &Builder) { 1046 assert(VF.isVector() && "only vector VFs are supported"); 1047 1048 // Create and check the types. 1049 auto *ValVTy = cast<VectorType>(Val->getType()); 1050 ElementCount VLen = ValVTy->getElementCount(); 1051 1052 Type *STy = Val->getType()->getScalarType(); 1053 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) && 1054 "Induction Step must be an integer or FP"); 1055 assert(Step->getType() == STy && "Step has wrong type"); 1056 1057 SmallVector<Constant *, 8> Indices; 1058 1059 // Create a vector of consecutive numbers from zero to VF. 1060 VectorType *InitVecValVTy = ValVTy; 1061 if (STy->isFloatingPointTy()) { 1062 Type *InitVecValSTy = 1063 IntegerType::get(STy->getContext(), STy->getScalarSizeInBits()); 1064 InitVecValVTy = VectorType::get(InitVecValSTy, VLen); 1065 } 1066 Value *InitVec = Builder.CreateStepVector(InitVecValVTy); 1067 1068 // Splat the StartIdx 1069 Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx); 1070 1071 if (STy->isIntegerTy()) { 1072 InitVec = Builder.CreateAdd(InitVec, StartIdxSplat); 1073 Step = Builder.CreateVectorSplat(VLen, Step); 1074 assert(Step->getType() == Val->getType() && "Invalid step vec"); 1075 // FIXME: The newly created binary instructions should contain nsw/nuw 1076 // flags, which can be found from the original scalar operations. 1077 Step = Builder.CreateMul(InitVec, Step); 1078 return Builder.CreateAdd(Val, Step, "induction"); 1079 } 1080 1081 // Floating point induction. 1082 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) && 1083 "Binary Opcode should be specified for FP induction"); 1084 InitVec = Builder.CreateUIToFP(InitVec, ValVTy); 1085 InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat); 1086 1087 Step = Builder.CreateVectorSplat(VLen, Step); 1088 Value *MulOp = Builder.CreateFMul(InitVec, Step); 1089 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction"); 1090 } 1091 1092 /// A helper function that returns an integer or floating-point constant with 1093 /// value C. 1094 static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { 1095 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C) 1096 : ConstantFP::get(Ty, C); 1097 } 1098 1099 static Value *getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy, 1100 ElementCount VF) { 1101 assert(FTy->isFloatingPointTy() && "Expected floating point type!"); 1102 Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits()); 1103 Value *RuntimeVF = getRuntimeVF(B, IntTy, VF); 1104 return B.CreateUIToFP(RuntimeVF, FTy); 1105 } 1106 1107 void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { 1108 assert(!State.Instance && "Int or FP induction being replicated."); 1109 1110 Value *Start = getStartValue()->getLiveInIRValue(); 1111 const InductionDescriptor &ID = getInductionDescriptor(); 1112 TruncInst *Trunc = getTruncInst(); 1113 IRBuilderBase &Builder = State.Builder; 1114 assert(IV->getType() == ID.getStartValue()->getType() && "Types must match"); 1115 assert(State.VF.isVector() && "must have vector VF"); 1116 1117 // The value from the original loop to which we are mapping the new induction 1118 // variable. 1119 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV; 1120 1121 // Fast-math-flags propagate from the original induction instruction. 1122 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 1123 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp())) 1124 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); 1125 1126 // Now do the actual transformations, and start with fetching the step value. 1127 Value *Step = State.get(getStepValue(), VPIteration(0, 0)); 1128 1129 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) && 1130 "Expected either an induction phi-node or a truncate of it!"); 1131 1132 // Construct the initial value of the vector IV in the vector loop preheader 1133 auto CurrIP = Builder.saveIP(); 1134 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1135 Builder.SetInsertPoint(VectorPH->getTerminator()); 1136 if (isa<TruncInst>(EntryVal)) { 1137 assert(Start->getType()->isIntegerTy() && 1138 "Truncation requires an integer type"); 1139 auto *TruncType = cast<IntegerType>(EntryVal->getType()); 1140 Step = Builder.CreateTrunc(Step, TruncType); 1141 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType); 1142 } 1143 1144 Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0); 1145 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start); 1146 Value *SteppedStart = getStepVector( 1147 SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder); 1148 1149 // We create vector phi nodes for both integer and floating-point induction 1150 // variables. Here, we determine the kind of arithmetic we will perform. 1151 Instruction::BinaryOps AddOp; 1152 Instruction::BinaryOps MulOp; 1153 if (Step->getType()->isIntegerTy()) { 1154 AddOp = Instruction::Add; 1155 MulOp = Instruction::Mul; 1156 } else { 1157 AddOp = ID.getInductionOpcode(); 1158 MulOp = Instruction::FMul; 1159 } 1160 1161 // Multiply the vectorization factor by the step using integer or 1162 // floating-point arithmetic as appropriate. 1163 Type *StepType = Step->getType(); 1164 Value *RuntimeVF; 1165 if (Step->getType()->isFloatingPointTy()) 1166 RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF); 1167 else 1168 RuntimeVF = getRuntimeVF(Builder, StepType, State.VF); 1169 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF); 1170 1171 // Create a vector splat to use in the induction update. 1172 // 1173 // FIXME: If the step is non-constant, we create the vector splat with 1174 // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't 1175 // handle a constant vector splat. 1176 Value *SplatVF = isa<Constant>(Mul) 1177 ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul)) 1178 : Builder.CreateVectorSplat(State.VF, Mul); 1179 Builder.restoreIP(CurrIP); 1180 1181 // We may need to add the step a number of times, depending on the unroll 1182 // factor. The last of those goes into the PHI. 1183 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind"); 1184 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); 1185 VecInd->setDebugLoc(EntryVal->getDebugLoc()); 1186 Instruction *LastInduction = VecInd; 1187 for (unsigned Part = 0; Part < State.UF; ++Part) { 1188 State.set(this, LastInduction, Part); 1189 1190 if (isa<TruncInst>(EntryVal)) 1191 State.addMetadata(LastInduction, EntryVal); 1192 1193 LastInduction = cast<Instruction>( 1194 Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")); 1195 LastInduction->setDebugLoc(EntryVal->getDebugLoc()); 1196 } 1197 1198 LastInduction->setName("vec.ind.next"); 1199 VecInd->addIncoming(SteppedStart, VectorPH); 1200 // Add induction update using an incorrect block temporarily. The phi node 1201 // will be fixed after VPlan execution. Note that at this point the latch 1202 // block cannot be used, as it does not exist yet. 1203 // TODO: Model increment value in VPlan, by turning the recipe into a 1204 // multi-def and a subclass of VPHeaderPHIRecipe. 1205 VecInd->addIncoming(LastInduction, VectorPH); 1206 } 1207 1208 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1209 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, 1210 VPSlotTracker &SlotTracker) const { 1211 O << Indent << "WIDEN-INDUCTION"; 1212 if (getTruncInst()) { 1213 O << "\\l\""; 1214 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; 1215 O << " +\n" << Indent << "\" "; 1216 getVPValue(0)->printAsOperand(O, SlotTracker); 1217 } else 1218 O << " " << VPlanIngredient(IV); 1219 1220 O << ", "; 1221 getStepValue()->printAsOperand(O, SlotTracker); 1222 } 1223 #endif 1224 1225 bool VPWidenIntOrFpInductionRecipe::isCanonical() const { 1226 // The step may be defined by a recipe in the preheader (e.g. if it requires 1227 // SCEV expansion), but for the canonical induction the step is required to be 1228 // 1, which is represented as live-in. 1229 if (getStepValue()->getDefiningRecipe()) 1230 return false; 1231 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue()); 1232 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); 1233 auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin()); 1234 return StartC && StartC->isZero() && StepC && StepC->isOne() && 1235 getScalarType() == CanIV->getScalarType(); 1236 } 1237 1238 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1239 void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent, 1240 VPSlotTracker &SlotTracker) const { 1241 O << Indent; 1242 printAsOperand(O, SlotTracker); 1243 O << Indent << "= DERIVED-IV "; 1244 getStartValue()->printAsOperand(O, SlotTracker); 1245 O << " + "; 1246 getOperand(1)->printAsOperand(O, SlotTracker); 1247 O << " * "; 1248 getStepValue()->printAsOperand(O, SlotTracker); 1249 } 1250 #endif 1251 1252 void VPScalarIVStepsRecipe::execute(VPTransformState &State) { 1253 // Fast-math-flags propagate from the original induction instruction. 1254 IRBuilder<>::FastMathFlagGuard FMFG(State.Builder); 1255 if (hasFastMathFlags()) 1256 State.Builder.setFastMathFlags(getFastMathFlags()); 1257 1258 /// Compute scalar induction steps. \p ScalarIV is the scalar induction 1259 /// variable on which to base the steps, \p Step is the size of the step. 1260 1261 Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0)); 1262 Value *Step = State.get(getStepValue(), VPIteration(0, 0)); 1263 IRBuilderBase &Builder = State.Builder; 1264 1265 // Ensure step has the same type as that of scalar IV. 1266 Type *BaseIVTy = BaseIV->getType()->getScalarType(); 1267 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!"); 1268 1269 // We build scalar steps for both integer and floating-point induction 1270 // variables. Here, we determine the kind of arithmetic we will perform. 1271 Instruction::BinaryOps AddOp; 1272 Instruction::BinaryOps MulOp; 1273 if (BaseIVTy->isIntegerTy()) { 1274 AddOp = Instruction::Add; 1275 MulOp = Instruction::Mul; 1276 } else { 1277 AddOp = InductionOpcode; 1278 MulOp = Instruction::FMul; 1279 } 1280 1281 // Determine the number of scalars we need to generate for each unroll 1282 // iteration. 1283 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this); 1284 // Compute the scalar steps and save the results in State. 1285 Type *IntStepTy = 1286 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits()); 1287 Type *VecIVTy = nullptr; 1288 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr; 1289 if (!FirstLaneOnly && State.VF.isScalable()) { 1290 VecIVTy = VectorType::get(BaseIVTy, State.VF); 1291 UnitStepVec = 1292 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF)); 1293 SplatStep = Builder.CreateVectorSplat(State.VF, Step); 1294 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV); 1295 } 1296 1297 unsigned StartPart = 0; 1298 unsigned EndPart = State.UF; 1299 unsigned StartLane = 0; 1300 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue(); 1301 if (State.Instance) { 1302 StartPart = State.Instance->Part; 1303 EndPart = StartPart + 1; 1304 StartLane = State.Instance->Lane.getKnownLane(); 1305 EndLane = StartLane + 1; 1306 } 1307 for (unsigned Part = StartPart; Part < EndPart; ++Part) { 1308 Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part); 1309 1310 if (!FirstLaneOnly && State.VF.isScalable()) { 1311 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0); 1312 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec); 1313 if (BaseIVTy->isFloatingPointTy()) 1314 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy); 1315 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep); 1316 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul); 1317 State.set(this, Add, Part); 1318 // It's useful to record the lane values too for the known minimum number 1319 // of elements so we do those below. This improves the code quality when 1320 // trying to extract the first element, for example. 1321 } 1322 1323 if (BaseIVTy->isFloatingPointTy()) 1324 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy); 1325 1326 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) { 1327 Value *StartIdx = Builder.CreateBinOp( 1328 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane)); 1329 // The step returned by `createStepForVF` is a runtime-evaluated value 1330 // when VF is scalable. Otherwise, it should be folded into a Constant. 1331 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) && 1332 "Expected StartIdx to be folded to a constant when VF is not " 1333 "scalable"); 1334 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step); 1335 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul); 1336 State.set(this, Add, VPIteration(Part, Lane)); 1337 } 1338 } 1339 } 1340 1341 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1342 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, 1343 VPSlotTracker &SlotTracker) const { 1344 O << Indent; 1345 printAsOperand(O, SlotTracker); 1346 O << " = SCALAR-STEPS "; 1347 printOperands(O, SlotTracker); 1348 } 1349 #endif 1350 1351 void VPWidenGEPRecipe::execute(VPTransformState &State) { 1352 assert(State.VF.isVector() && "not widening"); 1353 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr()); 1354 // Construct a vector GEP by widening the operands of the scalar GEP as 1355 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP 1356 // results in a vector of pointers when at least one operand of the GEP 1357 // is vector-typed. Thus, to keep the representation compact, we only use 1358 // vector-typed operands for loop-varying values. 1359 1360 if (areAllOperandsInvariant()) { 1361 // If we are vectorizing, but the GEP has only loop-invariant operands, 1362 // the GEP we build (by only using vector-typed operands for 1363 // loop-varying values) would be a scalar pointer. Thus, to ensure we 1364 // produce a vector of pointers, we need to either arbitrarily pick an 1365 // operand to broadcast, or broadcast a clone of the original GEP. 1366 // Here, we broadcast a clone of the original. 1367 // 1368 // TODO: If at some point we decide to scalarize instructions having 1369 // loop-invariant operands, this special case will no longer be 1370 // required. We would add the scalarization decision to 1371 // collectLoopScalars() and teach getVectorValue() to broadcast 1372 // the lane-zero scalar value. 1373 SmallVector<Value *> Ops; 1374 for (unsigned I = 0, E = getNumOperands(); I != E; I++) 1375 Ops.push_back(State.get(getOperand(I), VPIteration(0, 0))); 1376 1377 auto *NewGEP = 1378 State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0], 1379 ArrayRef(Ops).drop_front(), "", isInBounds()); 1380 for (unsigned Part = 0; Part < State.UF; ++Part) { 1381 Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP); 1382 State.set(this, EntryPart, Part); 1383 State.addMetadata(EntryPart, GEP); 1384 } 1385 } else { 1386 // If the GEP has at least one loop-varying operand, we are sure to 1387 // produce a vector of pointers. But if we are only unrolling, we want 1388 // to produce a scalar GEP for each unroll part. Thus, the GEP we 1389 // produce with the code below will be scalar (if VF == 1) or vector 1390 // (otherwise). Note that for the unroll-only case, we still maintain 1391 // values in the vector mapping with initVector, as we do for other 1392 // instructions. 1393 for (unsigned Part = 0; Part < State.UF; ++Part) { 1394 // The pointer operand of the new GEP. If it's loop-invariant, we 1395 // won't broadcast it. 1396 auto *Ptr = isPointerLoopInvariant() 1397 ? State.get(getOperand(0), VPIteration(0, 0)) 1398 : State.get(getOperand(0), Part); 1399 1400 // Collect all the indices for the new GEP. If any index is 1401 // loop-invariant, we won't broadcast it. 1402 SmallVector<Value *, 4> Indices; 1403 for (unsigned I = 1, E = getNumOperands(); I < E; I++) { 1404 VPValue *Operand = getOperand(I); 1405 if (isIndexLoopInvariant(I - 1)) 1406 Indices.push_back(State.get(Operand, VPIteration(0, 0))); 1407 else 1408 Indices.push_back(State.get(Operand, Part)); 1409 } 1410 1411 // Create the new GEP. Note that this GEP may be a scalar if VF == 1, 1412 // but it should be a vector, otherwise. 1413 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr, 1414 Indices, "", isInBounds()); 1415 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && 1416 "NewGEP is not a pointer vector"); 1417 State.set(this, NewGEP, Part); 1418 State.addMetadata(NewGEP, GEP); 1419 } 1420 } 1421 } 1422 1423 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1424 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, 1425 VPSlotTracker &SlotTracker) const { 1426 O << Indent << "WIDEN-GEP "; 1427 O << (isPointerLoopInvariant() ? "Inv" : "Var"); 1428 for (size_t I = 0; I < getNumOperands() - 1; ++I) 1429 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]"; 1430 1431 O << " "; 1432 printAsOperand(O, SlotTracker); 1433 O << " = getelementptr"; 1434 printFlags(O); 1435 printOperands(O, SlotTracker); 1436 } 1437 #endif 1438 1439 void VPVectorPointerRecipe ::execute(VPTransformState &State) { 1440 auto &Builder = State.Builder; 1441 State.setDebugLocFrom(getDebugLoc()); 1442 for (unsigned Part = 0; Part < State.UF; ++Part) { 1443 // Calculate the pointer for the specific unroll-part. 1444 Value *PartPtr = nullptr; 1445 // Use i32 for the gep index type when the value is constant, 1446 // or query DataLayout for a more suitable index type otherwise. 1447 const DataLayout &DL = 1448 Builder.GetInsertBlock()->getModule()->getDataLayout(); 1449 Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0) 1450 ? DL.getIndexType(IndexedTy->getPointerTo()) 1451 : Builder.getInt32Ty(); 1452 Value *Ptr = State.get(getOperand(0), VPIteration(0, 0)); 1453 bool InBounds = isInBounds(); 1454 if (IsReverse) { 1455 // If the address is consecutive but reversed, then the 1456 // wide store needs to start at the last vector element. 1457 // RunTimeVF = VScale * VF.getKnownMinValue() 1458 // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() 1459 Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF); 1460 // NumElt = -Part * RunTimeVF 1461 Value *NumElt = Builder.CreateMul( 1462 ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF); 1463 // LastLane = 1 - RunTimeVF 1464 Value *LastLane = 1465 Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); 1466 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds); 1467 PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds); 1468 } else { 1469 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part); 1470 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds); 1471 } 1472 1473 State.set(this, PartPtr, Part, /*IsScalar*/ true); 1474 } 1475 } 1476 1477 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1478 void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent, 1479 VPSlotTracker &SlotTracker) const { 1480 O << Indent; 1481 printAsOperand(O, SlotTracker); 1482 O << " = vector-pointer "; 1483 if (IsReverse) 1484 O << "(reverse) "; 1485 1486 printOperands(O, SlotTracker); 1487 } 1488 #endif 1489 1490 void VPBlendRecipe::execute(VPTransformState &State) { 1491 State.setDebugLocFrom(getDebugLoc()); 1492 // We know that all PHIs in non-header blocks are converted into 1493 // selects, so we don't have to worry about the insertion order and we 1494 // can just use the builder. 1495 // At this point we generate the predication tree. There may be 1496 // duplications since this is a simple recursive scan, but future 1497 // optimizations will clean it up. 1498 1499 unsigned NumIncoming = getNumIncomingValues(); 1500 1501 // Generate a sequence of selects of the form: 1502 // SELECT(Mask3, In3, 1503 // SELECT(Mask2, In2, 1504 // SELECT(Mask1, In1, 1505 // In0))) 1506 // Note that Mask0 is never used: lanes for which no path reaches this phi and 1507 // are essentially undef are taken from In0. 1508 VectorParts Entry(State.UF); 1509 for (unsigned In = 0; In < NumIncoming; ++In) { 1510 for (unsigned Part = 0; Part < State.UF; ++Part) { 1511 // We might have single edge PHIs (blocks) - use an identity 1512 // 'select' for the first PHI operand. 1513 Value *In0 = State.get(getIncomingValue(In), Part); 1514 if (In == 0) 1515 Entry[Part] = In0; // Initialize with the first incoming value. 1516 else { 1517 // Select between the current value and the previous incoming edge 1518 // based on the incoming mask. 1519 Value *Cond = State.get(getMask(In), Part); 1520 Entry[Part] = 1521 State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi"); 1522 } 1523 } 1524 } 1525 for (unsigned Part = 0; Part < State.UF; ++Part) 1526 State.set(this, Entry[Part], Part); 1527 } 1528 1529 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1530 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, 1531 VPSlotTracker &SlotTracker) const { 1532 O << Indent << "BLEND "; 1533 printAsOperand(O, SlotTracker); 1534 O << " ="; 1535 if (getNumIncomingValues() == 1) { 1536 // Not a User of any mask: not really blending, this is a 1537 // single-predecessor phi. 1538 O << " "; 1539 getIncomingValue(0)->printAsOperand(O, SlotTracker); 1540 } else { 1541 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { 1542 O << " "; 1543 getIncomingValue(I)->printAsOperand(O, SlotTracker); 1544 if (I == 0) 1545 continue; 1546 O << "/"; 1547 getMask(I)->printAsOperand(O, SlotTracker); 1548 } 1549 } 1550 } 1551 #endif 1552 1553 void VPReductionRecipe::execute(VPTransformState &State) { 1554 assert(!State.Instance && "Reduction being replicated."); 1555 Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true); 1556 RecurKind Kind = RdxDesc.getRecurrenceKind(); 1557 // Propagate the fast-math flags carried by the underlying instruction. 1558 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 1559 State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); 1560 for (unsigned Part = 0; Part < State.UF; ++Part) { 1561 Value *NewVecOp = State.get(getVecOp(), Part); 1562 if (VPValue *Cond = getCondOp()) { 1563 Value *NewCond = State.get(Cond, Part, State.VF.isScalar()); 1564 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType()); 1565 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType(); 1566 Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy, 1567 RdxDesc.getFastMathFlags()); 1568 if (State.VF.isVector()) { 1569 Iden = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden); 1570 } 1571 1572 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden); 1573 NewVecOp = Select; 1574 } 1575 Value *NewRed; 1576 Value *NextInChain; 1577 if (IsOrdered) { 1578 if (State.VF.isVector()) 1579 NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp, 1580 PrevInChain); 1581 else 1582 NewRed = State.Builder.CreateBinOp( 1583 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain, 1584 NewVecOp); 1585 PrevInChain = NewRed; 1586 } else { 1587 PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true); 1588 NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp); 1589 } 1590 if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) { 1591 NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(), 1592 NewRed, PrevInChain); 1593 } else if (IsOrdered) 1594 NextInChain = NewRed; 1595 else 1596 NextInChain = State.Builder.CreateBinOp( 1597 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain); 1598 State.set(this, NextInChain, Part, /*IsScalar*/ true); 1599 } 1600 } 1601 1602 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1603 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, 1604 VPSlotTracker &SlotTracker) const { 1605 O << Indent << "REDUCE "; 1606 printAsOperand(O, SlotTracker); 1607 O << " = "; 1608 getChainOp()->printAsOperand(O, SlotTracker); 1609 O << " +"; 1610 if (isa<FPMathOperator>(getUnderlyingInstr())) 1611 O << getUnderlyingInstr()->getFastMathFlags(); 1612 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " ("; 1613 getVecOp()->printAsOperand(O, SlotTracker); 1614 if (getCondOp()) { 1615 O << ", "; 1616 getCondOp()->printAsOperand(O, SlotTracker); 1617 } 1618 O << ")"; 1619 if (RdxDesc.IntermediateStore) 1620 O << " (with final reduction value stored in invariant address sank " 1621 "outside of loop)"; 1622 } 1623 #endif 1624 1625 bool VPReplicateRecipe::shouldPack() const { 1626 // Find if the recipe is used by a widened recipe via an intervening 1627 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector. 1628 return any_of(users(), [](const VPUser *U) { 1629 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U)) 1630 return any_of(PredR->users(), [PredR](const VPUser *U) { 1631 return !U->usesScalars(PredR); 1632 }); 1633 return false; 1634 }); 1635 } 1636 1637 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1638 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, 1639 VPSlotTracker &SlotTracker) const { 1640 O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); 1641 1642 if (!getUnderlyingInstr()->getType()->isVoidTy()) { 1643 printAsOperand(O, SlotTracker); 1644 O << " = "; 1645 } 1646 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) { 1647 O << "call"; 1648 printFlags(O); 1649 O << "@" << CB->getCalledFunction()->getName() << "("; 1650 interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)), 1651 O, [&O, &SlotTracker](VPValue *Op) { 1652 Op->printAsOperand(O, SlotTracker); 1653 }); 1654 O << ")"; 1655 } else { 1656 O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()); 1657 printFlags(O); 1658 printOperands(O, SlotTracker); 1659 } 1660 1661 if (shouldPack()) 1662 O << " (S->V)"; 1663 } 1664 #endif 1665 1666 /// Checks if \p C is uniform across all VFs and UFs. It is considered as such 1667 /// if it is either defined outside the vector region or its operand is known to 1668 /// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI). 1669 /// TODO: Uniformity should be associated with a VPValue and there should be a 1670 /// generic way to check. 1671 static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C) { 1672 return C->isDefinedOutsideVectorRegions() || 1673 isa<VPDerivedIVRecipe>(C->getOperand(0)) || 1674 isa<VPCanonicalIVPHIRecipe>(C->getOperand(0)); 1675 } 1676 1677 Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) { 1678 assert(vputils::onlyFirstLaneUsed(this) && 1679 "Codegen only implemented for first lane."); 1680 switch (Opcode) { 1681 case Instruction::SExt: 1682 case Instruction::ZExt: 1683 case Instruction::Trunc: { 1684 // Note: SExt/ZExt not used yet. 1685 Value *Op = State.get(getOperand(0), VPIteration(Part, 0)); 1686 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy); 1687 } 1688 default: 1689 llvm_unreachable("opcode not implemented yet"); 1690 } 1691 } 1692 1693 void VPScalarCastRecipe ::execute(VPTransformState &State) { 1694 bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this); 1695 for (unsigned Part = 0; Part != State.UF; ++Part) { 1696 Value *Res; 1697 // Only generate a single instance, if the recipe is uniform across UFs and 1698 // VFs. 1699 if (Part > 0 && IsUniformAcrossVFsAndUFs) 1700 Res = State.get(this, VPIteration(0, 0)); 1701 else 1702 Res = generate(State, Part); 1703 State.set(this, Res, VPIteration(Part, 0)); 1704 } 1705 } 1706 1707 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1708 void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent, 1709 VPSlotTracker &SlotTracker) const { 1710 O << Indent << "SCALAR-CAST "; 1711 printAsOperand(O, SlotTracker); 1712 O << " = " << Instruction::getOpcodeName(Opcode) << " "; 1713 printOperands(O, SlotTracker); 1714 O << " to " << *ResultTy; 1715 } 1716 #endif 1717 1718 void VPBranchOnMaskRecipe::execute(VPTransformState &State) { 1719 assert(State.Instance && "Branch on Mask works only on single instance."); 1720 1721 unsigned Part = State.Instance->Part; 1722 unsigned Lane = State.Instance->Lane.getKnownLane(); 1723 1724 Value *ConditionBit = nullptr; 1725 VPValue *BlockInMask = getMask(); 1726 if (BlockInMask) { 1727 ConditionBit = State.get(BlockInMask, Part); 1728 if (ConditionBit->getType()->isVectorTy()) 1729 ConditionBit = State.Builder.CreateExtractElement( 1730 ConditionBit, State.Builder.getInt32(Lane)); 1731 } else // Block in mask is all-one. 1732 ConditionBit = State.Builder.getTrue(); 1733 1734 // Replace the temporary unreachable terminator with a new conditional branch, 1735 // whose two destinations will be set later when they are created. 1736 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator(); 1737 assert(isa<UnreachableInst>(CurrentTerminator) && 1738 "Expected to replace unreachable terminator with conditional branch."); 1739 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit); 1740 CondBr->setSuccessor(0, nullptr); 1741 ReplaceInstWithInst(CurrentTerminator, CondBr); 1742 } 1743 1744 void VPPredInstPHIRecipe::execute(VPTransformState &State) { 1745 assert(State.Instance && "Predicated instruction PHI works per instance."); 1746 Instruction *ScalarPredInst = 1747 cast<Instruction>(State.get(getOperand(0), *State.Instance)); 1748 BasicBlock *PredicatedBB = ScalarPredInst->getParent(); 1749 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); 1750 assert(PredicatingBB && "Predicated block has no single predecessor."); 1751 assert(isa<VPReplicateRecipe>(getOperand(0)) && 1752 "operand must be VPReplicateRecipe"); 1753 1754 // By current pack/unpack logic we need to generate only a single phi node: if 1755 // a vector value for the predicated instruction exists at this point it means 1756 // the instruction has vector users only, and a phi for the vector value is 1757 // needed. In this case the recipe of the predicated instruction is marked to 1758 // also do that packing, thereby "hoisting" the insert-element sequence. 1759 // Otherwise, a phi node for the scalar value is needed. 1760 unsigned Part = State.Instance->Part; 1761 if (State.hasVectorValue(getOperand(0), Part)) { 1762 Value *VectorValue = State.get(getOperand(0), Part); 1763 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue); 1764 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); 1765 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. 1766 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. 1767 if (State.hasVectorValue(this, Part)) 1768 State.reset(this, VPhi, Part); 1769 else 1770 State.set(this, VPhi, Part); 1771 // NOTE: Currently we need to update the value of the operand, so the next 1772 // predicated iteration inserts its generated value in the correct vector. 1773 State.reset(getOperand(0), VPhi, Part); 1774 } else { 1775 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType(); 1776 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); 1777 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), 1778 PredicatingBB); 1779 Phi->addIncoming(ScalarPredInst, PredicatedBB); 1780 if (State.hasScalarValue(this, *State.Instance)) 1781 State.reset(this, Phi, *State.Instance); 1782 else 1783 State.set(this, Phi, *State.Instance); 1784 // NOTE: Currently we need to update the value of the operand, so the next 1785 // predicated iteration inserts its generated value in the correct vector. 1786 State.reset(getOperand(0), Phi, *State.Instance); 1787 } 1788 } 1789 1790 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1791 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1792 VPSlotTracker &SlotTracker) const { 1793 O << Indent << "PHI-PREDICATED-INSTRUCTION "; 1794 printAsOperand(O, SlotTracker); 1795 O << " = "; 1796 printOperands(O, SlotTracker); 1797 } 1798 1799 void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent, 1800 VPSlotTracker &SlotTracker) const { 1801 O << Indent << "WIDEN "; 1802 printAsOperand(O, SlotTracker); 1803 O << " = load "; 1804 printOperands(O, SlotTracker); 1805 } 1806 1807 void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent, 1808 VPSlotTracker &SlotTracker) const { 1809 O << Indent << "WIDEN "; 1810 printAsOperand(O, SlotTracker); 1811 O << " = vp.load "; 1812 printOperands(O, SlotTracker); 1813 } 1814 1815 void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent, 1816 VPSlotTracker &SlotTracker) const { 1817 O << Indent << "WIDEN store "; 1818 printOperands(O, SlotTracker); 1819 } 1820 1821 void VPWidenStoreEVLRecipe::print(raw_ostream &O, const Twine &Indent, 1822 VPSlotTracker &SlotTracker) const { 1823 O << Indent << "WIDEN vp.store "; 1824 printOperands(O, SlotTracker); 1825 } 1826 #endif 1827 1828 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { 1829 Value *Start = getStartValue()->getLiveInIRValue(); 1830 PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index"); 1831 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); 1832 1833 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1834 EntryPart->addIncoming(Start, VectorPH); 1835 EntryPart->setDebugLoc(getDebugLoc()); 1836 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 1837 State.set(this, EntryPart, Part, /*IsScalar*/ true); 1838 } 1839 1840 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1841 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1842 VPSlotTracker &SlotTracker) const { 1843 O << Indent << "EMIT "; 1844 printAsOperand(O, SlotTracker); 1845 O << " = CANONICAL-INDUCTION "; 1846 printOperands(O, SlotTracker); 1847 } 1848 #endif 1849 1850 bool VPCanonicalIVPHIRecipe::isCanonical( 1851 InductionDescriptor::InductionKind Kind, VPValue *Start, 1852 VPValue *Step) const { 1853 // Must be an integer induction. 1854 if (Kind != InductionDescriptor::IK_IntInduction) 1855 return false; 1856 // Start must match the start value of this canonical induction. 1857 if (Start != getStartValue()) 1858 return false; 1859 1860 // If the step is defined by a recipe, it is not a ConstantInt. 1861 if (Step->getDefiningRecipe()) 1862 return false; 1863 1864 ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue()); 1865 return StepC && StepC->isOne(); 1866 } 1867 1868 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) { 1869 return IsScalarAfterVectorization && 1870 (!IsScalable || vputils::onlyFirstLaneUsed(this)); 1871 } 1872 1873 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1874 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, 1875 VPSlotTracker &SlotTracker) const { 1876 O << Indent << "EMIT "; 1877 printAsOperand(O, SlotTracker); 1878 O << " = WIDEN-POINTER-INDUCTION "; 1879 getStartValue()->printAsOperand(O, SlotTracker); 1880 O << ", " << *IndDesc.getStep(); 1881 } 1882 #endif 1883 1884 void VPExpandSCEVRecipe::execute(VPTransformState &State) { 1885 assert(!State.Instance && "cannot be used in per-lane"); 1886 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout(); 1887 SCEVExpander Exp(SE, DL, "induction"); 1888 1889 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), 1890 &*State.Builder.GetInsertPoint()); 1891 assert(!State.ExpandedSCEVs.contains(Expr) && 1892 "Same SCEV expanded multiple times"); 1893 State.ExpandedSCEVs[Expr] = Res; 1894 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 1895 State.set(this, Res, {Part, 0}); 1896 } 1897 1898 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1899 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent, 1900 VPSlotTracker &SlotTracker) const { 1901 O << Indent << "EMIT "; 1902 getVPSingleValue()->printAsOperand(O, SlotTracker); 1903 O << " = EXPAND SCEV " << *Expr; 1904 } 1905 #endif 1906 1907 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { 1908 Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true); 1909 Type *STy = CanonicalIV->getType(); 1910 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 1911 ElementCount VF = State.VF; 1912 Value *VStart = VF.isScalar() 1913 ? CanonicalIV 1914 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast"); 1915 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 1916 Value *VStep = createStepForVF(Builder, STy, VF, Part); 1917 if (VF.isVector()) { 1918 VStep = Builder.CreateVectorSplat(VF, VStep); 1919 VStep = 1920 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType())); 1921 } 1922 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); 1923 State.set(this, CanonicalVectorIV, Part); 1924 } 1925 } 1926 1927 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1928 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, 1929 VPSlotTracker &SlotTracker) const { 1930 O << Indent << "EMIT "; 1931 printAsOperand(O, SlotTracker); 1932 O << " = WIDEN-CANONICAL-INDUCTION "; 1933 printOperands(O, SlotTracker); 1934 } 1935 #endif 1936 1937 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { 1938 auto &Builder = State.Builder; 1939 // Create a vector from the initial value. 1940 auto *VectorInit = getStartValue()->getLiveInIRValue(); 1941 1942 Type *VecTy = State.VF.isScalar() 1943 ? VectorInit->getType() 1944 : VectorType::get(VectorInit->getType(), State.VF); 1945 1946 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1947 if (State.VF.isVector()) { 1948 auto *IdxTy = Builder.getInt32Ty(); 1949 auto *One = ConstantInt::get(IdxTy, 1); 1950 IRBuilder<>::InsertPointGuard Guard(Builder); 1951 Builder.SetInsertPoint(VectorPH->getTerminator()); 1952 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); 1953 auto *LastIdx = Builder.CreateSub(RuntimeVF, One); 1954 VectorInit = Builder.CreateInsertElement( 1955 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); 1956 } 1957 1958 // Create a phi node for the new recurrence. 1959 PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur"); 1960 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); 1961 EntryPart->addIncoming(VectorInit, VectorPH); 1962 State.set(this, EntryPart, 0); 1963 } 1964 1965 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1966 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, 1967 VPSlotTracker &SlotTracker) const { 1968 O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; 1969 printAsOperand(O, SlotTracker); 1970 O << " = phi "; 1971 printOperands(O, SlotTracker); 1972 } 1973 #endif 1974 1975 void VPReductionPHIRecipe::execute(VPTransformState &State) { 1976 auto &Builder = State.Builder; 1977 1978 // Reductions do not have to start at zero. They can start with 1979 // any loop invariant values. 1980 VPValue *StartVPV = getStartValue(); 1981 Value *StartV = StartVPV->getLiveInIRValue(); 1982 1983 // In order to support recurrences we need to be able to vectorize Phi nodes. 1984 // Phi nodes have cycles, so we need to vectorize them in two stages. This is 1985 // stage #1: We create a new vector PHI node with no incoming edges. We'll use 1986 // this value when we vectorize all of the instructions that use the PHI. 1987 bool ScalarPHI = State.VF.isScalar() || IsInLoop; 1988 Type *VecTy = ScalarPHI ? StartV->getType() 1989 : VectorType::get(StartV->getType(), State.VF); 1990 1991 BasicBlock *HeaderBB = State.CFG.PrevBB; 1992 assert(State.CurrentVectorLoop->getHeader() == HeaderBB && 1993 "recipe must be in the vector loop header"); 1994 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF; 1995 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 1996 Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi"); 1997 EntryPart->insertBefore(HeaderBB->getFirstInsertionPt()); 1998 State.set(this, EntryPart, Part, IsInLoop); 1999 } 2000 2001 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 2002 2003 Value *Iden = nullptr; 2004 RecurKind RK = RdxDesc.getRecurrenceKind(); 2005 if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || 2006 RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) { 2007 // MinMax and AnyOf reductions have the start value as their identity. 2008 if (ScalarPHI) { 2009 Iden = StartV; 2010 } else { 2011 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 2012 Builder.SetInsertPoint(VectorPH->getTerminator()); 2013 StartV = Iden = 2014 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); 2015 } 2016 } else { 2017 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(), 2018 RdxDesc.getFastMathFlags()); 2019 2020 if (!ScalarPHI) { 2021 Iden = Builder.CreateVectorSplat(State.VF, Iden); 2022 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 2023 Builder.SetInsertPoint(VectorPH->getTerminator()); 2024 Constant *Zero = Builder.getInt32(0); 2025 StartV = Builder.CreateInsertElement(Iden, StartV, Zero); 2026 } 2027 } 2028 2029 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 2030 Value *EntryPart = State.get(this, Part, IsInLoop); 2031 // Make sure to add the reduction start value only to the 2032 // first unroll part. 2033 Value *StartVal = (Part == 0) ? StartV : Iden; 2034 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH); 2035 } 2036 } 2037 2038 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2039 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, 2040 VPSlotTracker &SlotTracker) const { 2041 O << Indent << "WIDEN-REDUCTION-PHI "; 2042 2043 printAsOperand(O, SlotTracker); 2044 O << " = phi "; 2045 printOperands(O, SlotTracker); 2046 } 2047 #endif 2048 2049 void VPWidenPHIRecipe::execute(VPTransformState &State) { 2050 assert(EnableVPlanNativePath && 2051 "Non-native vplans are not expected to have VPWidenPHIRecipes."); 2052 2053 Value *Op0 = State.get(getOperand(0), 0); 2054 Type *VecTy = Op0->getType(); 2055 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); 2056 State.set(this, VecPhi, 0); 2057 } 2058 2059 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2060 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, 2061 VPSlotTracker &SlotTracker) const { 2062 O << Indent << "WIDEN-PHI "; 2063 2064 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue()); 2065 // Unless all incoming values are modeled in VPlan print the original PHI 2066 // directly. 2067 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming 2068 // values as VPValues. 2069 if (getNumOperands() != OriginalPhi->getNumOperands()) { 2070 O << VPlanIngredient(OriginalPhi); 2071 return; 2072 } 2073 2074 printAsOperand(O, SlotTracker); 2075 O << " = phi "; 2076 printOperands(O, SlotTracker); 2077 } 2078 #endif 2079 2080 // TODO: It would be good to use the existing VPWidenPHIRecipe instead and 2081 // remove VPActiveLaneMaskPHIRecipe. 2082 void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) { 2083 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 2084 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 2085 Value *StartMask = State.get(getOperand(0), Part); 2086 PHINode *EntryPart = 2087 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask"); 2088 EntryPart->addIncoming(StartMask, VectorPH); 2089 EntryPart->setDebugLoc(getDebugLoc()); 2090 State.set(this, EntryPart, Part); 2091 } 2092 } 2093 2094 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2095 void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent, 2096 VPSlotTracker &SlotTracker) const { 2097 O << Indent << "ACTIVE-LANE-MASK-PHI "; 2098 2099 printAsOperand(O, SlotTracker); 2100 O << " = phi "; 2101 printOperands(O, SlotTracker); 2102 } 2103 #endif 2104 2105 void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) { 2106 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 2107 assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization."); 2108 Value *Start = State.get(getOperand(0), VPIteration(0, 0)); 2109 PHINode *EntryPart = 2110 State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv"); 2111 EntryPart->addIncoming(Start, VectorPH); 2112 EntryPart->setDebugLoc(getDebugLoc()); 2113 State.set(this, EntryPart, 0, /*IsScalar=*/true); 2114 } 2115 2116 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2117 void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 2118 VPSlotTracker &SlotTracker) const { 2119 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI "; 2120 2121 printAsOperand(O, SlotTracker); 2122 O << " = phi "; 2123 printOperands(O, SlotTracker); 2124 } 2125 #endif 2126