1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains implementations for different VPlan recipes. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "VPlan.h" 15 #include "VPlanAnalysis.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/Twine.h" 19 #include "llvm/Analysis/IVDescriptors.h" 20 #include "llvm/IR/BasicBlock.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/Instruction.h" 23 #include "llvm/IR/Instructions.h" 24 #include "llvm/IR/Type.h" 25 #include "llvm/IR/Value.h" 26 #include "llvm/Support/Casting.h" 27 #include "llvm/Support/CommandLine.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 31 #include "llvm/Transforms/Utils/LoopUtils.h" 32 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 33 #include <cassert> 34 35 using namespace llvm; 36 37 using VectorParts = SmallVector<Value *, 2>; 38 39 namespace llvm { 40 extern cl::opt<bool> EnableVPlanNativePath; 41 } 42 43 #define LV_NAME "loop-vectorize" 44 #define DEBUG_TYPE LV_NAME 45 46 bool VPRecipeBase::mayWriteToMemory() const { 47 switch (getVPDefID()) { 48 case VPInterleaveSC: 49 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0; 50 case VPWidenStoreEVLSC: 51 case VPWidenStoreSC: 52 return true; 53 case VPReplicateSC: 54 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 55 ->mayWriteToMemory(); 56 case VPWidenCallSC: 57 return !cast<VPWidenCallRecipe>(this) 58 ->getCalledScalarFunction() 59 ->onlyReadsMemory(); 60 case VPBranchOnMaskSC: 61 case VPScalarIVStepsSC: 62 case VPPredInstPHISC: 63 return false; 64 case VPBlendSC: 65 case VPReductionSC: 66 case VPWidenCanonicalIVSC: 67 case VPWidenCastSC: 68 case VPWidenGEPSC: 69 case VPWidenIntOrFpInductionSC: 70 case VPWidenLoadEVLSC: 71 case VPWidenLoadSC: 72 case VPWidenPHISC: 73 case VPWidenSC: 74 case VPWidenSelectSC: { 75 const Instruction *I = 76 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 77 (void)I; 78 assert((!I || !I->mayWriteToMemory()) && 79 "underlying instruction may write to memory"); 80 return false; 81 } 82 default: 83 return true; 84 } 85 } 86 87 bool VPRecipeBase::mayReadFromMemory() const { 88 switch (getVPDefID()) { 89 case VPWidenLoadEVLSC: 90 case VPWidenLoadSC: 91 return true; 92 case VPReplicateSC: 93 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 94 ->mayReadFromMemory(); 95 case VPWidenCallSC: 96 return !cast<VPWidenCallRecipe>(this) 97 ->getCalledScalarFunction() 98 ->onlyWritesMemory(); 99 case VPBranchOnMaskSC: 100 case VPPredInstPHISC: 101 case VPScalarIVStepsSC: 102 case VPWidenStoreEVLSC: 103 case VPWidenStoreSC: 104 return false; 105 case VPBlendSC: 106 case VPReductionSC: 107 case VPWidenCanonicalIVSC: 108 case VPWidenCastSC: 109 case VPWidenGEPSC: 110 case VPWidenIntOrFpInductionSC: 111 case VPWidenPHISC: 112 case VPWidenSC: 113 case VPWidenSelectSC: { 114 const Instruction *I = 115 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 116 (void)I; 117 assert((!I || !I->mayReadFromMemory()) && 118 "underlying instruction may read from memory"); 119 return false; 120 } 121 default: 122 return true; 123 } 124 } 125 126 bool VPRecipeBase::mayHaveSideEffects() const { 127 switch (getVPDefID()) { 128 case VPDerivedIVSC: 129 case VPPredInstPHISC: 130 case VPScalarCastSC: 131 return false; 132 case VPInstructionSC: 133 switch (cast<VPInstruction>(this)->getOpcode()) { 134 case Instruction::Or: 135 case Instruction::ICmp: 136 case Instruction::Select: 137 case VPInstruction::Not: 138 case VPInstruction::CalculateTripCountMinusVF: 139 case VPInstruction::CanonicalIVIncrementForPart: 140 case VPInstruction::ExtractFromEnd: 141 case VPInstruction::LogicalAnd: 142 case VPInstruction::PtrAdd: 143 return false; 144 default: 145 return true; 146 } 147 case VPWidenCallSC: { 148 Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction(); 149 return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn(); 150 } 151 case VPBlendSC: 152 case VPReductionSC: 153 case VPScalarIVStepsSC: 154 case VPWidenCanonicalIVSC: 155 case VPWidenCastSC: 156 case VPWidenGEPSC: 157 case VPWidenIntOrFpInductionSC: 158 case VPWidenPHISC: 159 case VPWidenPointerInductionSC: 160 case VPWidenSC: 161 case VPWidenSelectSC: { 162 const Instruction *I = 163 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 164 (void)I; 165 assert((!I || !I->mayHaveSideEffects()) && 166 "underlying instruction has side-effects"); 167 return false; 168 } 169 case VPInterleaveSC: 170 return mayWriteToMemory(); 171 case VPWidenLoadEVLSC: 172 case VPWidenLoadSC: 173 case VPWidenStoreEVLSC: 174 case VPWidenStoreSC: 175 assert( 176 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() == 177 mayWriteToMemory() && 178 "mayHaveSideffects result for ingredient differs from this " 179 "implementation"); 180 return mayWriteToMemory(); 181 case VPReplicateSC: { 182 auto *R = cast<VPReplicateRecipe>(this); 183 return R->getUnderlyingInstr()->mayHaveSideEffects(); 184 } 185 default: 186 return true; 187 } 188 } 189 190 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { 191 auto Lane = VPLane::getLastLaneForVF(State.VF); 192 VPValue *ExitValue = getOperand(0); 193 if (vputils::isUniformAfterVectorization(ExitValue)) 194 Lane = VPLane::getFirstLane(); 195 VPBasicBlock *MiddleVPBB = 196 cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor()); 197 assert(MiddleVPBB->getNumSuccessors() == 0 && 198 "the middle block must not have any successors"); 199 BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB]; 200 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)), 201 MiddleBB); 202 } 203 204 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 205 void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const { 206 O << "Live-out "; 207 getPhi()->printAsOperand(O); 208 O << " = "; 209 getOperand(0)->printAsOperand(O, SlotTracker); 210 O << "\n"; 211 } 212 #endif 213 214 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { 215 assert(!Parent && "Recipe already in some VPBasicBlock"); 216 assert(InsertPos->getParent() && 217 "Insertion position not in any VPBasicBlock"); 218 InsertPos->getParent()->insert(this, InsertPos->getIterator()); 219 } 220 221 void VPRecipeBase::insertBefore(VPBasicBlock &BB, 222 iplist<VPRecipeBase>::iterator I) { 223 assert(!Parent && "Recipe already in some VPBasicBlock"); 224 assert(I == BB.end() || I->getParent() == &BB); 225 BB.insert(this, I); 226 } 227 228 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { 229 assert(!Parent && "Recipe already in some VPBasicBlock"); 230 assert(InsertPos->getParent() && 231 "Insertion position not in any VPBasicBlock"); 232 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator())); 233 } 234 235 void VPRecipeBase::removeFromParent() { 236 assert(getParent() && "Recipe not in any VPBasicBlock"); 237 getParent()->getRecipeList().remove(getIterator()); 238 Parent = nullptr; 239 } 240 241 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { 242 assert(getParent() && "Recipe not in any VPBasicBlock"); 243 return getParent()->getRecipeList().erase(getIterator()); 244 } 245 246 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { 247 removeFromParent(); 248 insertAfter(InsertPos); 249 } 250 251 void VPRecipeBase::moveBefore(VPBasicBlock &BB, 252 iplist<VPRecipeBase>::iterator I) { 253 removeFromParent(); 254 insertBefore(BB, I); 255 } 256 257 FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const { 258 assert(OpType == OperationType::FPMathOp && 259 "recipe doesn't have fast math flags"); 260 FastMathFlags Res; 261 Res.setAllowReassoc(FMFs.AllowReassoc); 262 Res.setNoNaNs(FMFs.NoNaNs); 263 Res.setNoInfs(FMFs.NoInfs); 264 Res.setNoSignedZeros(FMFs.NoSignedZeros); 265 Res.setAllowReciprocal(FMFs.AllowReciprocal); 266 Res.setAllowContract(FMFs.AllowContract); 267 Res.setApproxFunc(FMFs.ApproxFunc); 268 return Res; 269 } 270 271 VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, 272 VPValue *A, VPValue *B, DebugLoc DL, 273 const Twine &Name) 274 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}), 275 Pred, DL), 276 Opcode(Opcode), Name(Name.str()) { 277 assert(Opcode == Instruction::ICmp && 278 "only ICmp predicates supported at the moment"); 279 } 280 281 VPInstruction::VPInstruction(unsigned Opcode, 282 std::initializer_list<VPValue *> Operands, 283 FastMathFlags FMFs, DebugLoc DL, const Twine &Name) 284 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL), 285 Opcode(Opcode), Name(Name.str()) { 286 // Make sure the VPInstruction is a floating-point operation. 287 assert(isFPMathOp() && "this op can't take fast-math flags"); 288 } 289 290 bool VPInstruction::doesGeneratePerAllLanes() const { 291 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this); 292 } 293 294 bool VPInstruction::canGenerateScalarForFirstLane() const { 295 if (Instruction::isBinaryOp(getOpcode())) 296 return true; 297 if (isVectorToScalar()) 298 return true; 299 switch (Opcode) { 300 case VPInstruction::BranchOnCond: 301 case VPInstruction::BranchOnCount: 302 case VPInstruction::CalculateTripCountMinusVF: 303 case VPInstruction::CanonicalIVIncrementForPart: 304 case VPInstruction::PtrAdd: 305 case VPInstruction::ExplicitVectorLength: 306 return true; 307 default: 308 return false; 309 } 310 } 311 312 Value *VPInstruction::generatePerLane(VPTransformState &State, 313 const VPIteration &Lane) { 314 IRBuilderBase &Builder = State.Builder; 315 316 assert(getOpcode() == VPInstruction::PtrAdd && 317 "only PtrAdd opcodes are supported for now"); 318 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane), 319 State.get(getOperand(1), Lane), Name); 320 } 321 322 Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { 323 IRBuilderBase &Builder = State.Builder; 324 325 if (Instruction::isBinaryOp(getOpcode())) { 326 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); 327 if (Part != 0 && vputils::onlyFirstPartUsed(this)) 328 return State.get(this, 0, OnlyFirstLaneUsed); 329 330 Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed); 331 Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed); 332 auto *Res = 333 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name); 334 if (auto *I = dyn_cast<Instruction>(Res)) 335 setFlags(I); 336 return Res; 337 } 338 339 switch (getOpcode()) { 340 case VPInstruction::Not: { 341 Value *A = State.get(getOperand(0), Part); 342 return Builder.CreateNot(A, Name); 343 } 344 case Instruction::ICmp: { 345 Value *A = State.get(getOperand(0), Part); 346 Value *B = State.get(getOperand(1), Part); 347 return Builder.CreateCmp(getPredicate(), A, B, Name); 348 } 349 case Instruction::Select: { 350 Value *Cond = State.get(getOperand(0), Part); 351 Value *Op1 = State.get(getOperand(1), Part); 352 Value *Op2 = State.get(getOperand(2), Part); 353 return Builder.CreateSelect(Cond, Op1, Op2, Name); 354 } 355 case VPInstruction::ActiveLaneMask: { 356 // Get first lane of vector induction variable. 357 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); 358 // Get the original loop tripcount. 359 Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0)); 360 361 // If this part of the active lane mask is scalar, generate the CMP directly 362 // to avoid unnecessary extracts. 363 if (State.VF.isScalar()) 364 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC, 365 Name); 366 367 auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); 368 auto *PredTy = VectorType::get(Int1Ty, State.VF); 369 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask, 370 {PredTy, ScalarTC->getType()}, 371 {VIVElem0, ScalarTC}, nullptr, Name); 372 } 373 case VPInstruction::FirstOrderRecurrenceSplice: { 374 // Generate code to combine the previous and current values in vector v3. 375 // 376 // vector.ph: 377 // v_init = vector(..., ..., ..., a[-1]) 378 // br vector.body 379 // 380 // vector.body 381 // i = phi [0, vector.ph], [i+4, vector.body] 382 // v1 = phi [v_init, vector.ph], [v2, vector.body] 383 // v2 = a[i, i+1, i+2, i+3]; 384 // v3 = vector(v1(3), v2(0, 1, 2)) 385 386 // For the first part, use the recurrence phi (v1), otherwise v2. 387 auto *V1 = State.get(getOperand(0), 0); 388 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); 389 if (!PartMinus1->getType()->isVectorTy()) 390 return PartMinus1; 391 Value *V2 = State.get(getOperand(1), Part); 392 return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name); 393 } 394 case VPInstruction::CalculateTripCountMinusVF: { 395 if (Part != 0) 396 return State.get(this, 0, /*IsScalar*/ true); 397 398 Value *ScalarTC = State.get(getOperand(0), {0, 0}); 399 Value *Step = 400 createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF); 401 Value *Sub = Builder.CreateSub(ScalarTC, Step); 402 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step); 403 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0); 404 return Builder.CreateSelect(Cmp, Sub, Zero); 405 } 406 case VPInstruction::ExplicitVectorLength: { 407 // Compute EVL 408 auto GetEVL = [=](VPTransformState &State, Value *AVL) { 409 assert(AVL->getType()->isIntegerTy() && 410 "Requested vector length should be an integer."); 411 412 // TODO: Add support for MaxSafeDist for correct loop emission. 413 assert(State.VF.isScalable() && "Expected scalable vector factor."); 414 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue()); 415 416 Value *EVL = State.Builder.CreateIntrinsic( 417 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length, 418 {AVL, VFArg, State.Builder.getTrue()}); 419 return EVL; 420 }; 421 // TODO: Restructure this code with an explicit remainder loop, vsetvli can 422 // be outside of the main loop. 423 assert(Part == 0 && "No unrolling expected for predicated vectorization."); 424 // Compute VTC - IV as the AVL (requested vector length). 425 Value *Index = State.get(getOperand(0), VPIteration(0, 0)); 426 Value *TripCount = State.get(getOperand(1), VPIteration(0, 0)); 427 Value *AVL = State.Builder.CreateSub(TripCount, Index); 428 Value *EVL = GetEVL(State, AVL); 429 return EVL; 430 } 431 case VPInstruction::CanonicalIVIncrementForPart: { 432 auto *IV = State.get(getOperand(0), VPIteration(0, 0)); 433 if (Part == 0) 434 return IV; 435 436 // The canonical IV is incremented by the vectorization factor (num of SIMD 437 // elements) times the unroll part. 438 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part); 439 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(), 440 hasNoSignedWrap()); 441 } 442 case VPInstruction::BranchOnCond: { 443 if (Part != 0) 444 return nullptr; 445 446 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0)); 447 VPRegionBlock *ParentRegion = getParent()->getParent(); 448 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock(); 449 450 // Replace the temporary unreachable terminator with a new conditional 451 // branch, hooking it up to backward destination for exiting blocks now and 452 // to forward destination(s) later when they are created. 453 BranchInst *CondBr = 454 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr); 455 456 if (getParent()->isExiting()) 457 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]); 458 459 CondBr->setSuccessor(0, nullptr); 460 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 461 return CondBr; 462 } 463 case VPInstruction::BranchOnCount: { 464 if (Part != 0) 465 return nullptr; 466 // First create the compare. 467 Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true); 468 Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true); 469 Value *Cond = Builder.CreateICmpEQ(IV, TC); 470 471 // Now create the branch. 472 auto *Plan = getParent()->getPlan(); 473 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); 474 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); 475 476 // Replace the temporary unreachable terminator with a new conditional 477 // branch, hooking it up to backward destination (the header) now and to the 478 // forward destination (the exit/middle block) later when it is created. 479 // Note that CreateCondBr expects a valid BB as first argument, so we need 480 // to set it to nullptr later. 481 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), 482 State.CFG.VPBB2IRBB[Header]); 483 CondBr->setSuccessor(0, nullptr); 484 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 485 return CondBr; 486 } 487 case VPInstruction::ComputeReductionResult: { 488 if (Part != 0) 489 return State.get(this, 0, /*IsScalar*/ true); 490 491 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary 492 // and will be removed by breaking up the recipe further. 493 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0)); 494 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue()); 495 // Get its reduction variable descriptor. 496 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); 497 498 RecurKind RK = RdxDesc.getRecurrenceKind(); 499 500 VPValue *LoopExitingDef = getOperand(1); 501 Type *PhiTy = OrigPhi->getType(); 502 VectorParts RdxParts(State.UF); 503 for (unsigned Part = 0; Part < State.UF; ++Part) 504 RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop()); 505 506 // If the vector reduction can be performed in a smaller type, we truncate 507 // then extend the loop exit value to enable InstCombine to evaluate the 508 // entire expression in the smaller type. 509 // TODO: Handle this in truncateToMinBW. 510 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { 511 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF); 512 for (unsigned Part = 0; Part < State.UF; ++Part) 513 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); 514 } 515 // Reduce all of the unrolled parts into a single vector. 516 Value *ReducedPartRdx = RdxParts[0]; 517 unsigned Op = RecurrenceDescriptor::getOpcode(RK); 518 if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) 519 Op = Instruction::Or; 520 521 if (PhiR->isOrdered()) { 522 ReducedPartRdx = RdxParts[State.UF - 1]; 523 } else { 524 // Floating-point operations should have some FMF to enable the reduction. 525 IRBuilderBase::FastMathFlagGuard FMFG(Builder); 526 Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); 527 for (unsigned Part = 1; Part < State.UF; ++Part) { 528 Value *RdxPart = RdxParts[Part]; 529 if (Op != Instruction::ICmp && Op != Instruction::FCmp) 530 ReducedPartRdx = Builder.CreateBinOp( 531 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx"); 532 else 533 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart); 534 } 535 } 536 537 // Create the reduction after the loop. Note that inloop reductions create 538 // the target reduction in the loop using a Reduction recipe. 539 if ((State.VF.isVector() || 540 RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) && 541 !PhiR->isInLoop()) { 542 ReducedPartRdx = 543 createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi); 544 // If the reduction can be performed in a smaller type, we need to extend 545 // the reduction to the wider type before we branch to the original loop. 546 if (PhiTy != RdxDesc.getRecurrenceType()) 547 ReducedPartRdx = RdxDesc.isSigned() 548 ? Builder.CreateSExt(ReducedPartRdx, PhiTy) 549 : Builder.CreateZExt(ReducedPartRdx, PhiTy); 550 } 551 552 // If there were stores of the reduction value to a uniform memory address 553 // inside the loop, create the final store here. 554 if (StoreInst *SI = RdxDesc.IntermediateStore) { 555 auto *NewSI = Builder.CreateAlignedStore( 556 ReducedPartRdx, SI->getPointerOperand(), SI->getAlign()); 557 propagateMetadata(NewSI, SI); 558 } 559 560 return ReducedPartRdx; 561 } 562 case VPInstruction::ExtractFromEnd: { 563 if (Part != 0) 564 return State.get(this, 0, /*IsScalar*/ true); 565 566 auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue()); 567 unsigned Offset = CI->getZExtValue(); 568 assert(Offset > 0 && "Offset from end must be positive"); 569 Value *Res; 570 if (State.VF.isVector()) { 571 assert(Offset <= State.VF.getKnownMinValue() && 572 "invalid offset to extract from"); 573 // Extract lane VF - Offset from the operand. 574 Res = State.get( 575 getOperand(0), 576 VPIteration(State.UF - 1, VPLane::getLaneFromEnd(State.VF, Offset))); 577 } else { 578 assert(Offset <= State.UF && "invalid offset to extract from"); 579 // When loop is unrolled without vectorizing, retrieve UF - Offset. 580 Res = State.get(getOperand(0), State.UF - Offset); 581 } 582 Res->setName(Name); 583 return Res; 584 } 585 case VPInstruction::LogicalAnd: { 586 Value *A = State.get(getOperand(0), Part); 587 Value *B = State.get(getOperand(1), Part); 588 return Builder.CreateLogicalAnd(A, B, Name); 589 } 590 case VPInstruction::PtrAdd: { 591 assert(vputils::onlyFirstLaneUsed(this) && 592 "can only generate first lane for PtrAdd"); 593 Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true); 594 Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true); 595 return Builder.CreatePtrAdd(Ptr, Addend, Name); 596 } 597 default: 598 llvm_unreachable("Unsupported opcode for instruction"); 599 } 600 } 601 602 bool VPInstruction::isVectorToScalar() const { 603 return getOpcode() == VPInstruction::ExtractFromEnd || 604 getOpcode() == VPInstruction::ComputeReductionResult; 605 } 606 607 #if !defined(NDEBUG) 608 bool VPInstruction::isFPMathOp() const { 609 // Inspired by FPMathOperator::classof. Notable differences are that we don't 610 // support Call, PHI and Select opcodes here yet. 611 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul || 612 Opcode == Instruction::FNeg || Opcode == Instruction::FSub || 613 Opcode == Instruction::FDiv || Opcode == Instruction::FRem || 614 Opcode == Instruction::FCmp || Opcode == Instruction::Select; 615 } 616 #endif 617 618 void VPInstruction::execute(VPTransformState &State) { 619 assert(!State.Instance && "VPInstruction executing an Instance"); 620 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 621 assert((hasFastMathFlags() == isFPMathOp() || 622 getOpcode() == Instruction::Select) && 623 "Recipe not a FPMathOp but has fast-math flags?"); 624 if (hasFastMathFlags()) 625 State.Builder.setFastMathFlags(getFastMathFlags()); 626 State.setDebugLocFrom(getDebugLoc()); 627 bool GeneratesPerFirstLaneOnly = 628 canGenerateScalarForFirstLane() && 629 (vputils::onlyFirstLaneUsed(this) || isVectorToScalar()); 630 bool GeneratesPerAllLanes = doesGeneratePerAllLanes(); 631 for (unsigned Part = 0; Part < State.UF; ++Part) { 632 if (GeneratesPerAllLanes) { 633 for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue(); 634 Lane != NumLanes; ++Lane) { 635 Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane)); 636 assert(GeneratedValue && "generatePerLane must produce a value"); 637 State.set(this, GeneratedValue, VPIteration(Part, Lane)); 638 } 639 continue; 640 } 641 642 Value *GeneratedValue = generatePerPart(State, Part); 643 if (!hasResult()) 644 continue; 645 assert(GeneratedValue && "generatePerPart must produce a value"); 646 assert((GeneratedValue->getType()->isVectorTy() == 647 !GeneratesPerFirstLaneOnly || 648 State.VF.isScalar()) && 649 "scalar value but not only first lane defined"); 650 State.set(this, GeneratedValue, Part, 651 /*IsScalar*/ GeneratesPerFirstLaneOnly); 652 } 653 } 654 655 bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { 656 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); 657 if (Instruction::isBinaryOp(getOpcode())) 658 return vputils::onlyFirstLaneUsed(this); 659 660 switch (getOpcode()) { 661 default: 662 return false; 663 case Instruction::ICmp: 664 case VPInstruction::PtrAdd: 665 // TODO: Cover additional opcodes. 666 return vputils::onlyFirstLaneUsed(this); 667 case VPInstruction::ActiveLaneMask: 668 case VPInstruction::ExplicitVectorLength: 669 case VPInstruction::CalculateTripCountMinusVF: 670 case VPInstruction::CanonicalIVIncrementForPart: 671 case VPInstruction::BranchOnCount: 672 return true; 673 }; 674 llvm_unreachable("switch should return"); 675 } 676 677 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 678 void VPInstruction::dump() const { 679 VPSlotTracker SlotTracker(getParent()->getPlan()); 680 print(dbgs(), "", SlotTracker); 681 } 682 683 void VPInstruction::print(raw_ostream &O, const Twine &Indent, 684 VPSlotTracker &SlotTracker) const { 685 O << Indent << "EMIT "; 686 687 if (hasResult()) { 688 printAsOperand(O, SlotTracker); 689 O << " = "; 690 } 691 692 switch (getOpcode()) { 693 case VPInstruction::Not: 694 O << "not"; 695 break; 696 case VPInstruction::SLPLoad: 697 O << "combined load"; 698 break; 699 case VPInstruction::SLPStore: 700 O << "combined store"; 701 break; 702 case VPInstruction::ActiveLaneMask: 703 O << "active lane mask"; 704 break; 705 case VPInstruction::ExplicitVectorLength: 706 O << "EXPLICIT-VECTOR-LENGTH"; 707 break; 708 case VPInstruction::FirstOrderRecurrenceSplice: 709 O << "first-order splice"; 710 break; 711 case VPInstruction::BranchOnCond: 712 O << "branch-on-cond"; 713 break; 714 case VPInstruction::CalculateTripCountMinusVF: 715 O << "TC > VF ? TC - VF : 0"; 716 break; 717 case VPInstruction::CanonicalIVIncrementForPart: 718 O << "VF * Part +"; 719 break; 720 case VPInstruction::BranchOnCount: 721 O << "branch-on-count"; 722 break; 723 case VPInstruction::ExtractFromEnd: 724 O << "extract-from-end"; 725 break; 726 case VPInstruction::ComputeReductionResult: 727 O << "compute-reduction-result"; 728 break; 729 case VPInstruction::LogicalAnd: 730 O << "logical-and"; 731 break; 732 case VPInstruction::PtrAdd: 733 O << "ptradd"; 734 break; 735 default: 736 O << Instruction::getOpcodeName(getOpcode()); 737 } 738 739 printFlags(O); 740 printOperands(O, SlotTracker); 741 742 if (auto DL = getDebugLoc()) { 743 O << ", !dbg "; 744 DL.print(O); 745 } 746 } 747 #endif 748 749 void VPWidenCallRecipe::execute(VPTransformState &State) { 750 assert(State.VF.isVector() && "not widening"); 751 Function *CalledScalarFn = getCalledScalarFunction(); 752 assert(!isDbgInfoIntrinsic(CalledScalarFn->getIntrinsicID()) && 753 "DbgInfoIntrinsic should have been dropped during VPlan construction"); 754 State.setDebugLocFrom(getDebugLoc()); 755 756 bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic; 757 FunctionType *VFTy = nullptr; 758 if (Variant) 759 VFTy = Variant->getFunctionType(); 760 for (unsigned Part = 0; Part < State.UF; ++Part) { 761 SmallVector<Type *, 2> TysForDecl; 762 // Add return type if intrinsic is overloaded on it. 763 if (UseIntrinsic && 764 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) 765 TysForDecl.push_back(VectorType::get( 766 CalledScalarFn->getReturnType()->getScalarType(), State.VF)); 767 SmallVector<Value *, 4> Args; 768 for (const auto &I : enumerate(arg_operands())) { 769 // Some intrinsics have a scalar argument - don't replace it with a 770 // vector. 771 Value *Arg; 772 if (UseIntrinsic && 773 isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())) 774 Arg = State.get(I.value(), VPIteration(0, 0)); 775 // Some vectorized function variants may also take a scalar argument, 776 // e.g. linear parameters for pointers. This needs to be the scalar value 777 // from the start of the respective part when interleaving. 778 else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy()) 779 Arg = State.get(I.value(), VPIteration(Part, 0)); 780 else 781 Arg = State.get(I.value(), Part); 782 if (UseIntrinsic && 783 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index())) 784 TysForDecl.push_back(Arg->getType()); 785 Args.push_back(Arg); 786 } 787 788 Function *VectorF; 789 if (UseIntrinsic) { 790 // Use vector version of the intrinsic. 791 Module *M = State.Builder.GetInsertBlock()->getModule(); 792 VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl); 793 assert(VectorF && "Can't retrieve vector intrinsic."); 794 } else { 795 #ifndef NDEBUG 796 assert(Variant != nullptr && "Can't create vector function."); 797 #endif 798 VectorF = Variant; 799 } 800 801 auto *CI = cast_or_null<CallInst>(getUnderlyingInstr()); 802 SmallVector<OperandBundleDef, 1> OpBundles; 803 if (CI) 804 CI->getOperandBundlesAsDefs(OpBundles); 805 806 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles); 807 808 if (isa<FPMathOperator>(V)) 809 V->copyFastMathFlags(CI); 810 811 if (!V->getType()->isVoidTy()) 812 State.set(this, V, Part); 813 State.addMetadata(V, CI); 814 } 815 } 816 817 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 818 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, 819 VPSlotTracker &SlotTracker) const { 820 O << Indent << "WIDEN-CALL "; 821 822 Function *CalledFn = getCalledScalarFunction(); 823 if (CalledFn->getReturnType()->isVoidTy()) 824 O << "void "; 825 else { 826 printAsOperand(O, SlotTracker); 827 O << " = "; 828 } 829 830 O << "call @" << CalledFn->getName() << "("; 831 interleaveComma(arg_operands(), O, [&O, &SlotTracker](VPValue *Op) { 832 Op->printAsOperand(O, SlotTracker); 833 }); 834 O << ")"; 835 836 if (VectorIntrinsicID) 837 O << " (using vector intrinsic)"; 838 else { 839 O << " (using library function"; 840 if (Variant->hasName()) 841 O << ": " << Variant->getName(); 842 O << ")"; 843 } 844 } 845 846 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, 847 VPSlotTracker &SlotTracker) const { 848 O << Indent << "WIDEN-SELECT "; 849 printAsOperand(O, SlotTracker); 850 O << " = select "; 851 getOperand(0)->printAsOperand(O, SlotTracker); 852 O << ", "; 853 getOperand(1)->printAsOperand(O, SlotTracker); 854 O << ", "; 855 getOperand(2)->printAsOperand(O, SlotTracker); 856 O << (isInvariantCond() ? " (condition is loop invariant)" : ""); 857 } 858 #endif 859 860 void VPWidenSelectRecipe::execute(VPTransformState &State) { 861 State.setDebugLocFrom(getDebugLoc()); 862 863 // The condition can be loop invariant but still defined inside the 864 // loop. This means that we can't just use the original 'cond' value. 865 // We have to take the 'vectorized' value and pick the first lane. 866 // Instcombine will make this a no-op. 867 auto *InvarCond = 868 isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr; 869 870 for (unsigned Part = 0; Part < State.UF; ++Part) { 871 Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part); 872 Value *Op0 = State.get(getOperand(1), Part); 873 Value *Op1 = State.get(getOperand(2), Part); 874 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1); 875 State.set(this, Sel, Part); 876 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue())); 877 } 878 } 879 880 VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy( 881 const FastMathFlags &FMF) { 882 AllowReassoc = FMF.allowReassoc(); 883 NoNaNs = FMF.noNaNs(); 884 NoInfs = FMF.noInfs(); 885 NoSignedZeros = FMF.noSignedZeros(); 886 AllowReciprocal = FMF.allowReciprocal(); 887 AllowContract = FMF.allowContract(); 888 ApproxFunc = FMF.approxFunc(); 889 } 890 891 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 892 void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const { 893 switch (OpType) { 894 case OperationType::Cmp: 895 O << " " << CmpInst::getPredicateName(getPredicate()); 896 break; 897 case OperationType::DisjointOp: 898 if (DisjointFlags.IsDisjoint) 899 O << " disjoint"; 900 break; 901 case OperationType::PossiblyExactOp: 902 if (ExactFlags.IsExact) 903 O << " exact"; 904 break; 905 case OperationType::OverflowingBinOp: 906 if (WrapFlags.HasNUW) 907 O << " nuw"; 908 if (WrapFlags.HasNSW) 909 O << " nsw"; 910 break; 911 case OperationType::FPMathOp: 912 getFastMathFlags().print(O); 913 break; 914 case OperationType::GEPOp: 915 if (GEPFlags.IsInBounds) 916 O << " inbounds"; 917 break; 918 case OperationType::NonNegOp: 919 if (NonNegFlags.NonNeg) 920 O << " nneg"; 921 break; 922 case OperationType::Other: 923 break; 924 } 925 if (getNumOperands() > 0) 926 O << " "; 927 } 928 #endif 929 930 void VPWidenRecipe::execute(VPTransformState &State) { 931 State.setDebugLocFrom(getDebugLoc()); 932 auto &Builder = State.Builder; 933 switch (Opcode) { 934 case Instruction::Call: 935 case Instruction::Br: 936 case Instruction::PHI: 937 case Instruction::GetElementPtr: 938 case Instruction::Select: 939 llvm_unreachable("This instruction is handled by a different recipe."); 940 case Instruction::UDiv: 941 case Instruction::SDiv: 942 case Instruction::SRem: 943 case Instruction::URem: 944 case Instruction::Add: 945 case Instruction::FAdd: 946 case Instruction::Sub: 947 case Instruction::FSub: 948 case Instruction::FNeg: 949 case Instruction::Mul: 950 case Instruction::FMul: 951 case Instruction::FDiv: 952 case Instruction::FRem: 953 case Instruction::Shl: 954 case Instruction::LShr: 955 case Instruction::AShr: 956 case Instruction::And: 957 case Instruction::Or: 958 case Instruction::Xor: { 959 // Just widen unops and binops. 960 for (unsigned Part = 0; Part < State.UF; ++Part) { 961 SmallVector<Value *, 2> Ops; 962 for (VPValue *VPOp : operands()) 963 Ops.push_back(State.get(VPOp, Part)); 964 965 Value *V = Builder.CreateNAryOp(Opcode, Ops); 966 967 if (auto *VecOp = dyn_cast<Instruction>(V)) 968 setFlags(VecOp); 969 970 // Use this vector value for all users of the original instruction. 971 State.set(this, V, Part); 972 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue())); 973 } 974 975 break; 976 } 977 case Instruction::Freeze: { 978 for (unsigned Part = 0; Part < State.UF; ++Part) { 979 Value *Op = State.get(getOperand(0), Part); 980 981 Value *Freeze = Builder.CreateFreeze(Op); 982 State.set(this, Freeze, Part); 983 } 984 break; 985 } 986 case Instruction::ICmp: 987 case Instruction::FCmp: { 988 // Widen compares. Generate vector compares. 989 bool FCmp = Opcode == Instruction::FCmp; 990 for (unsigned Part = 0; Part < State.UF; ++Part) { 991 Value *A = State.get(getOperand(0), Part); 992 Value *B = State.get(getOperand(1), Part); 993 Value *C = nullptr; 994 if (FCmp) { 995 // Propagate fast math flags. 996 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 997 if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue())) 998 Builder.setFastMathFlags(I->getFastMathFlags()); 999 C = Builder.CreateFCmp(getPredicate(), A, B); 1000 } else { 1001 C = Builder.CreateICmp(getPredicate(), A, B); 1002 } 1003 State.set(this, C, Part); 1004 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue())); 1005 } 1006 1007 break; 1008 } 1009 default: 1010 // This instruction is not vectorized by simple widening. 1011 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : " 1012 << Instruction::getOpcodeName(Opcode)); 1013 llvm_unreachable("Unhandled instruction!"); 1014 } // end of switch. 1015 1016 #if !defined(NDEBUG) 1017 // Verify that VPlan type inference results agree with the type of the 1018 // generated values. 1019 for (unsigned Part = 0; Part < State.UF; ++Part) { 1020 assert(VectorType::get(State.TypeAnalysis.inferScalarType(this), 1021 State.VF) == State.get(this, Part)->getType() && 1022 "inferred type and type from generated instructions do not match"); 1023 } 1024 #endif 1025 } 1026 1027 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1028 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, 1029 VPSlotTracker &SlotTracker) const { 1030 O << Indent << "WIDEN "; 1031 printAsOperand(O, SlotTracker); 1032 O << " = " << Instruction::getOpcodeName(Opcode); 1033 printFlags(O); 1034 printOperands(O, SlotTracker); 1035 } 1036 #endif 1037 1038 void VPWidenCastRecipe::execute(VPTransformState &State) { 1039 State.setDebugLocFrom(getDebugLoc()); 1040 auto &Builder = State.Builder; 1041 /// Vectorize casts. 1042 assert(State.VF.isVector() && "Not vectorizing?"); 1043 Type *DestTy = VectorType::get(getResultType(), State.VF); 1044 VPValue *Op = getOperand(0); 1045 for (unsigned Part = 0; Part < State.UF; ++Part) { 1046 if (Part > 0 && Op->isLiveIn()) { 1047 // FIXME: Remove once explicit unrolling is implemented using VPlan. 1048 State.set(this, State.get(this, 0), Part); 1049 continue; 1050 } 1051 Value *A = State.get(Op, Part); 1052 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy); 1053 State.set(this, Cast, Part); 1054 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue())); 1055 } 1056 } 1057 1058 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1059 void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent, 1060 VPSlotTracker &SlotTracker) const { 1061 O << Indent << "WIDEN-CAST "; 1062 printAsOperand(O, SlotTracker); 1063 O << " = " << Instruction::getOpcodeName(Opcode) << " "; 1064 printFlags(O); 1065 printOperands(O, SlotTracker); 1066 O << " to " << *getResultType(); 1067 } 1068 #endif 1069 1070 /// This function adds 1071 /// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...) 1072 /// to each vector element of Val. The sequence starts at StartIndex. 1073 /// \p Opcode is relevant for FP induction variable. 1074 static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step, 1075 Instruction::BinaryOps BinOp, ElementCount VF, 1076 IRBuilderBase &Builder) { 1077 assert(VF.isVector() && "only vector VFs are supported"); 1078 1079 // Create and check the types. 1080 auto *ValVTy = cast<VectorType>(Val->getType()); 1081 ElementCount VLen = ValVTy->getElementCount(); 1082 1083 Type *STy = Val->getType()->getScalarType(); 1084 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) && 1085 "Induction Step must be an integer or FP"); 1086 assert(Step->getType() == STy && "Step has wrong type"); 1087 1088 SmallVector<Constant *, 8> Indices; 1089 1090 // Create a vector of consecutive numbers from zero to VF. 1091 VectorType *InitVecValVTy = ValVTy; 1092 if (STy->isFloatingPointTy()) { 1093 Type *InitVecValSTy = 1094 IntegerType::get(STy->getContext(), STy->getScalarSizeInBits()); 1095 InitVecValVTy = VectorType::get(InitVecValSTy, VLen); 1096 } 1097 Value *InitVec = Builder.CreateStepVector(InitVecValVTy); 1098 1099 // Splat the StartIdx 1100 Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx); 1101 1102 if (STy->isIntegerTy()) { 1103 InitVec = Builder.CreateAdd(InitVec, StartIdxSplat); 1104 Step = Builder.CreateVectorSplat(VLen, Step); 1105 assert(Step->getType() == Val->getType() && "Invalid step vec"); 1106 // FIXME: The newly created binary instructions should contain nsw/nuw 1107 // flags, which can be found from the original scalar operations. 1108 Step = Builder.CreateMul(InitVec, Step); 1109 return Builder.CreateAdd(Val, Step, "induction"); 1110 } 1111 1112 // Floating point induction. 1113 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) && 1114 "Binary Opcode should be specified for FP induction"); 1115 InitVec = Builder.CreateUIToFP(InitVec, ValVTy); 1116 InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat); 1117 1118 Step = Builder.CreateVectorSplat(VLen, Step); 1119 Value *MulOp = Builder.CreateFMul(InitVec, Step); 1120 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction"); 1121 } 1122 1123 /// A helper function that returns an integer or floating-point constant with 1124 /// value C. 1125 static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { 1126 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C) 1127 : ConstantFP::get(Ty, C); 1128 } 1129 1130 static Value *getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy, 1131 ElementCount VF) { 1132 assert(FTy->isFloatingPointTy() && "Expected floating point type!"); 1133 Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits()); 1134 Value *RuntimeVF = getRuntimeVF(B, IntTy, VF); 1135 return B.CreateUIToFP(RuntimeVF, FTy); 1136 } 1137 1138 void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { 1139 assert(!State.Instance && "Int or FP induction being replicated."); 1140 1141 Value *Start = getStartValue()->getLiveInIRValue(); 1142 const InductionDescriptor &ID = getInductionDescriptor(); 1143 TruncInst *Trunc = getTruncInst(); 1144 IRBuilderBase &Builder = State.Builder; 1145 assert(IV->getType() == ID.getStartValue()->getType() && "Types must match"); 1146 assert(State.VF.isVector() && "must have vector VF"); 1147 1148 // The value from the original loop to which we are mapping the new induction 1149 // variable. 1150 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV; 1151 1152 // Fast-math-flags propagate from the original induction instruction. 1153 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 1154 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp())) 1155 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); 1156 1157 // Now do the actual transformations, and start with fetching the step value. 1158 Value *Step = State.get(getStepValue(), VPIteration(0, 0)); 1159 1160 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) && 1161 "Expected either an induction phi-node or a truncate of it!"); 1162 1163 // Construct the initial value of the vector IV in the vector loop preheader 1164 auto CurrIP = Builder.saveIP(); 1165 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1166 Builder.SetInsertPoint(VectorPH->getTerminator()); 1167 if (isa<TruncInst>(EntryVal)) { 1168 assert(Start->getType()->isIntegerTy() && 1169 "Truncation requires an integer type"); 1170 auto *TruncType = cast<IntegerType>(EntryVal->getType()); 1171 Step = Builder.CreateTrunc(Step, TruncType); 1172 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType); 1173 } 1174 1175 Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0); 1176 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start); 1177 Value *SteppedStart = getStepVector( 1178 SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder); 1179 1180 // We create vector phi nodes for both integer and floating-point induction 1181 // variables. Here, we determine the kind of arithmetic we will perform. 1182 Instruction::BinaryOps AddOp; 1183 Instruction::BinaryOps MulOp; 1184 if (Step->getType()->isIntegerTy()) { 1185 AddOp = Instruction::Add; 1186 MulOp = Instruction::Mul; 1187 } else { 1188 AddOp = ID.getInductionOpcode(); 1189 MulOp = Instruction::FMul; 1190 } 1191 1192 // Multiply the vectorization factor by the step using integer or 1193 // floating-point arithmetic as appropriate. 1194 Type *StepType = Step->getType(); 1195 Value *RuntimeVF; 1196 if (Step->getType()->isFloatingPointTy()) 1197 RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF); 1198 else 1199 RuntimeVF = getRuntimeVF(Builder, StepType, State.VF); 1200 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF); 1201 1202 // Create a vector splat to use in the induction update. 1203 // 1204 // FIXME: If the step is non-constant, we create the vector splat with 1205 // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't 1206 // handle a constant vector splat. 1207 Value *SplatVF = isa<Constant>(Mul) 1208 ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul)) 1209 : Builder.CreateVectorSplat(State.VF, Mul); 1210 Builder.restoreIP(CurrIP); 1211 1212 // We may need to add the step a number of times, depending on the unroll 1213 // factor. The last of those goes into the PHI. 1214 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind"); 1215 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); 1216 VecInd->setDebugLoc(EntryVal->getDebugLoc()); 1217 Instruction *LastInduction = VecInd; 1218 for (unsigned Part = 0; Part < State.UF; ++Part) { 1219 State.set(this, LastInduction, Part); 1220 1221 if (isa<TruncInst>(EntryVal)) 1222 State.addMetadata(LastInduction, EntryVal); 1223 1224 LastInduction = cast<Instruction>( 1225 Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")); 1226 LastInduction->setDebugLoc(EntryVal->getDebugLoc()); 1227 } 1228 1229 LastInduction->setName("vec.ind.next"); 1230 VecInd->addIncoming(SteppedStart, VectorPH); 1231 // Add induction update using an incorrect block temporarily. The phi node 1232 // will be fixed after VPlan execution. Note that at this point the latch 1233 // block cannot be used, as it does not exist yet. 1234 // TODO: Model increment value in VPlan, by turning the recipe into a 1235 // multi-def and a subclass of VPHeaderPHIRecipe. 1236 VecInd->addIncoming(LastInduction, VectorPH); 1237 } 1238 1239 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1240 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, 1241 VPSlotTracker &SlotTracker) const { 1242 O << Indent << "WIDEN-INDUCTION"; 1243 if (getTruncInst()) { 1244 O << "\\l\""; 1245 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; 1246 O << " +\n" << Indent << "\" "; 1247 getVPValue(0)->printAsOperand(O, SlotTracker); 1248 } else 1249 O << " " << VPlanIngredient(IV); 1250 1251 O << ", "; 1252 getStepValue()->printAsOperand(O, SlotTracker); 1253 } 1254 #endif 1255 1256 bool VPWidenIntOrFpInductionRecipe::isCanonical() const { 1257 // The step may be defined by a recipe in the preheader (e.g. if it requires 1258 // SCEV expansion), but for the canonical induction the step is required to be 1259 // 1, which is represented as live-in. 1260 if (getStepValue()->getDefiningRecipe()) 1261 return false; 1262 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue()); 1263 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); 1264 auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin()); 1265 return StartC && StartC->isZero() && StepC && StepC->isOne() && 1266 getScalarType() == CanIV->getScalarType(); 1267 } 1268 1269 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1270 void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent, 1271 VPSlotTracker &SlotTracker) const { 1272 O << Indent; 1273 printAsOperand(O, SlotTracker); 1274 O << Indent << "= DERIVED-IV "; 1275 getStartValue()->printAsOperand(O, SlotTracker); 1276 O << " + "; 1277 getOperand(1)->printAsOperand(O, SlotTracker); 1278 O << " * "; 1279 getStepValue()->printAsOperand(O, SlotTracker); 1280 } 1281 #endif 1282 1283 void VPScalarIVStepsRecipe::execute(VPTransformState &State) { 1284 // Fast-math-flags propagate from the original induction instruction. 1285 IRBuilder<>::FastMathFlagGuard FMFG(State.Builder); 1286 if (hasFastMathFlags()) 1287 State.Builder.setFastMathFlags(getFastMathFlags()); 1288 1289 /// Compute scalar induction steps. \p ScalarIV is the scalar induction 1290 /// variable on which to base the steps, \p Step is the size of the step. 1291 1292 Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0)); 1293 Value *Step = State.get(getStepValue(), VPIteration(0, 0)); 1294 IRBuilderBase &Builder = State.Builder; 1295 1296 // Ensure step has the same type as that of scalar IV. 1297 Type *BaseIVTy = BaseIV->getType()->getScalarType(); 1298 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!"); 1299 1300 // We build scalar steps for both integer and floating-point induction 1301 // variables. Here, we determine the kind of arithmetic we will perform. 1302 Instruction::BinaryOps AddOp; 1303 Instruction::BinaryOps MulOp; 1304 if (BaseIVTy->isIntegerTy()) { 1305 AddOp = Instruction::Add; 1306 MulOp = Instruction::Mul; 1307 } else { 1308 AddOp = InductionOpcode; 1309 MulOp = Instruction::FMul; 1310 } 1311 1312 // Determine the number of scalars we need to generate for each unroll 1313 // iteration. 1314 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this); 1315 // Compute the scalar steps and save the results in State. 1316 Type *IntStepTy = 1317 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits()); 1318 Type *VecIVTy = nullptr; 1319 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr; 1320 if (!FirstLaneOnly && State.VF.isScalable()) { 1321 VecIVTy = VectorType::get(BaseIVTy, State.VF); 1322 UnitStepVec = 1323 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF)); 1324 SplatStep = Builder.CreateVectorSplat(State.VF, Step); 1325 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV); 1326 } 1327 1328 unsigned StartPart = 0; 1329 unsigned EndPart = State.UF; 1330 unsigned StartLane = 0; 1331 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue(); 1332 if (State.Instance) { 1333 StartPart = State.Instance->Part; 1334 EndPart = StartPart + 1; 1335 StartLane = State.Instance->Lane.getKnownLane(); 1336 EndLane = StartLane + 1; 1337 } 1338 for (unsigned Part = StartPart; Part < EndPart; ++Part) { 1339 Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part); 1340 1341 if (!FirstLaneOnly && State.VF.isScalable()) { 1342 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0); 1343 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec); 1344 if (BaseIVTy->isFloatingPointTy()) 1345 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy); 1346 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep); 1347 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul); 1348 State.set(this, Add, Part); 1349 // It's useful to record the lane values too for the known minimum number 1350 // of elements so we do those below. This improves the code quality when 1351 // trying to extract the first element, for example. 1352 } 1353 1354 if (BaseIVTy->isFloatingPointTy()) 1355 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy); 1356 1357 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) { 1358 Value *StartIdx = Builder.CreateBinOp( 1359 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane)); 1360 // The step returned by `createStepForVF` is a runtime-evaluated value 1361 // when VF is scalable. Otherwise, it should be folded into a Constant. 1362 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) && 1363 "Expected StartIdx to be folded to a constant when VF is not " 1364 "scalable"); 1365 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step); 1366 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul); 1367 State.set(this, Add, VPIteration(Part, Lane)); 1368 } 1369 } 1370 } 1371 1372 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1373 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, 1374 VPSlotTracker &SlotTracker) const { 1375 O << Indent; 1376 printAsOperand(O, SlotTracker); 1377 O << " = SCALAR-STEPS "; 1378 printOperands(O, SlotTracker); 1379 } 1380 #endif 1381 1382 void VPWidenGEPRecipe::execute(VPTransformState &State) { 1383 assert(State.VF.isVector() && "not widening"); 1384 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr()); 1385 // Construct a vector GEP by widening the operands of the scalar GEP as 1386 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP 1387 // results in a vector of pointers when at least one operand of the GEP 1388 // is vector-typed. Thus, to keep the representation compact, we only use 1389 // vector-typed operands for loop-varying values. 1390 1391 if (areAllOperandsInvariant()) { 1392 // If we are vectorizing, but the GEP has only loop-invariant operands, 1393 // the GEP we build (by only using vector-typed operands for 1394 // loop-varying values) would be a scalar pointer. Thus, to ensure we 1395 // produce a vector of pointers, we need to either arbitrarily pick an 1396 // operand to broadcast, or broadcast a clone of the original GEP. 1397 // Here, we broadcast a clone of the original. 1398 // 1399 // TODO: If at some point we decide to scalarize instructions having 1400 // loop-invariant operands, this special case will no longer be 1401 // required. We would add the scalarization decision to 1402 // collectLoopScalars() and teach getVectorValue() to broadcast 1403 // the lane-zero scalar value. 1404 SmallVector<Value *> Ops; 1405 for (unsigned I = 0, E = getNumOperands(); I != E; I++) 1406 Ops.push_back(State.get(getOperand(I), VPIteration(0, 0))); 1407 1408 auto *NewGEP = 1409 State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0], 1410 ArrayRef(Ops).drop_front(), "", isInBounds()); 1411 for (unsigned Part = 0; Part < State.UF; ++Part) { 1412 Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP); 1413 State.set(this, EntryPart, Part); 1414 State.addMetadata(EntryPart, GEP); 1415 } 1416 } else { 1417 // If the GEP has at least one loop-varying operand, we are sure to 1418 // produce a vector of pointers. But if we are only unrolling, we want 1419 // to produce a scalar GEP for each unroll part. Thus, the GEP we 1420 // produce with the code below will be scalar (if VF == 1) or vector 1421 // (otherwise). Note that for the unroll-only case, we still maintain 1422 // values in the vector mapping with initVector, as we do for other 1423 // instructions. 1424 for (unsigned Part = 0; Part < State.UF; ++Part) { 1425 // The pointer operand of the new GEP. If it's loop-invariant, we 1426 // won't broadcast it. 1427 auto *Ptr = isPointerLoopInvariant() 1428 ? State.get(getOperand(0), VPIteration(0, 0)) 1429 : State.get(getOperand(0), Part); 1430 1431 // Collect all the indices for the new GEP. If any index is 1432 // loop-invariant, we won't broadcast it. 1433 SmallVector<Value *, 4> Indices; 1434 for (unsigned I = 1, E = getNumOperands(); I < E; I++) { 1435 VPValue *Operand = getOperand(I); 1436 if (isIndexLoopInvariant(I - 1)) 1437 Indices.push_back(State.get(Operand, VPIteration(0, 0))); 1438 else 1439 Indices.push_back(State.get(Operand, Part)); 1440 } 1441 1442 // Create the new GEP. Note that this GEP may be a scalar if VF == 1, 1443 // but it should be a vector, otherwise. 1444 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr, 1445 Indices, "", isInBounds()); 1446 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && 1447 "NewGEP is not a pointer vector"); 1448 State.set(this, NewGEP, Part); 1449 State.addMetadata(NewGEP, GEP); 1450 } 1451 } 1452 } 1453 1454 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1455 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, 1456 VPSlotTracker &SlotTracker) const { 1457 O << Indent << "WIDEN-GEP "; 1458 O << (isPointerLoopInvariant() ? "Inv" : "Var"); 1459 for (size_t I = 0; I < getNumOperands() - 1; ++I) 1460 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]"; 1461 1462 O << " "; 1463 printAsOperand(O, SlotTracker); 1464 O << " = getelementptr"; 1465 printFlags(O); 1466 printOperands(O, SlotTracker); 1467 } 1468 #endif 1469 1470 void VPVectorPointerRecipe ::execute(VPTransformState &State) { 1471 auto &Builder = State.Builder; 1472 State.setDebugLocFrom(getDebugLoc()); 1473 for (unsigned Part = 0; Part < State.UF; ++Part) { 1474 // Calculate the pointer for the specific unroll-part. 1475 Value *PartPtr = nullptr; 1476 // Use i32 for the gep index type when the value is constant, 1477 // or query DataLayout for a more suitable index type otherwise. 1478 const DataLayout &DL = 1479 Builder.GetInsertBlock()->getModule()->getDataLayout(); 1480 Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0) 1481 ? DL.getIndexType(IndexedTy->getPointerTo()) 1482 : Builder.getInt32Ty(); 1483 Value *Ptr = State.get(getOperand(0), VPIteration(0, 0)); 1484 bool InBounds = isInBounds(); 1485 if (IsReverse) { 1486 // If the address is consecutive but reversed, then the 1487 // wide store needs to start at the last vector element. 1488 // RunTimeVF = VScale * VF.getKnownMinValue() 1489 // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() 1490 Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF); 1491 // NumElt = -Part * RunTimeVF 1492 Value *NumElt = Builder.CreateMul( 1493 ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF); 1494 // LastLane = 1 - RunTimeVF 1495 Value *LastLane = 1496 Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); 1497 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds); 1498 PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds); 1499 } else { 1500 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part); 1501 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds); 1502 } 1503 1504 State.set(this, PartPtr, Part, /*IsScalar*/ true); 1505 } 1506 } 1507 1508 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1509 void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent, 1510 VPSlotTracker &SlotTracker) const { 1511 O << Indent; 1512 printAsOperand(O, SlotTracker); 1513 O << " = vector-pointer "; 1514 if (IsReverse) 1515 O << "(reverse) "; 1516 1517 printOperands(O, SlotTracker); 1518 } 1519 #endif 1520 1521 void VPBlendRecipe::execute(VPTransformState &State) { 1522 State.setDebugLocFrom(getDebugLoc()); 1523 // We know that all PHIs in non-header blocks are converted into 1524 // selects, so we don't have to worry about the insertion order and we 1525 // can just use the builder. 1526 // At this point we generate the predication tree. There may be 1527 // duplications since this is a simple recursive scan, but future 1528 // optimizations will clean it up. 1529 1530 unsigned NumIncoming = getNumIncomingValues(); 1531 1532 // Generate a sequence of selects of the form: 1533 // SELECT(Mask3, In3, 1534 // SELECT(Mask2, In2, 1535 // SELECT(Mask1, In1, 1536 // In0))) 1537 // Note that Mask0 is never used: lanes for which no path reaches this phi and 1538 // are essentially undef are taken from In0. 1539 VectorParts Entry(State.UF); 1540 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); 1541 for (unsigned In = 0; In < NumIncoming; ++In) { 1542 for (unsigned Part = 0; Part < State.UF; ++Part) { 1543 // We might have single edge PHIs (blocks) - use an identity 1544 // 'select' for the first PHI operand. 1545 Value *In0 = State.get(getIncomingValue(In), Part, OnlyFirstLaneUsed); 1546 if (In == 0) 1547 Entry[Part] = In0; // Initialize with the first incoming value. 1548 else { 1549 // Select between the current value and the previous incoming edge 1550 // based on the incoming mask. 1551 Value *Cond = State.get(getMask(In), Part, OnlyFirstLaneUsed); 1552 Entry[Part] = 1553 State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi"); 1554 } 1555 } 1556 } 1557 for (unsigned Part = 0; Part < State.UF; ++Part) 1558 State.set(this, Entry[Part], Part, OnlyFirstLaneUsed); 1559 } 1560 1561 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1562 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, 1563 VPSlotTracker &SlotTracker) const { 1564 O << Indent << "BLEND "; 1565 printAsOperand(O, SlotTracker); 1566 O << " ="; 1567 if (getNumIncomingValues() == 1) { 1568 // Not a User of any mask: not really blending, this is a 1569 // single-predecessor phi. 1570 O << " "; 1571 getIncomingValue(0)->printAsOperand(O, SlotTracker); 1572 } else { 1573 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { 1574 O << " "; 1575 getIncomingValue(I)->printAsOperand(O, SlotTracker); 1576 if (I == 0) 1577 continue; 1578 O << "/"; 1579 getMask(I)->printAsOperand(O, SlotTracker); 1580 } 1581 } 1582 } 1583 #endif 1584 1585 void VPReductionRecipe::execute(VPTransformState &State) { 1586 assert(!State.Instance && "Reduction being replicated."); 1587 Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true); 1588 RecurKind Kind = RdxDesc.getRecurrenceKind(); 1589 // Propagate the fast-math flags carried by the underlying instruction. 1590 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 1591 State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); 1592 for (unsigned Part = 0; Part < State.UF; ++Part) { 1593 Value *NewVecOp = State.get(getVecOp(), Part); 1594 if (VPValue *Cond = getCondOp()) { 1595 Value *NewCond = State.get(Cond, Part, State.VF.isScalar()); 1596 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType()); 1597 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType(); 1598 Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy, 1599 RdxDesc.getFastMathFlags()); 1600 if (State.VF.isVector()) { 1601 Iden = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden); 1602 } 1603 1604 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden); 1605 NewVecOp = Select; 1606 } 1607 Value *NewRed; 1608 Value *NextInChain; 1609 if (IsOrdered) { 1610 if (State.VF.isVector()) 1611 NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp, 1612 PrevInChain); 1613 else 1614 NewRed = State.Builder.CreateBinOp( 1615 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain, 1616 NewVecOp); 1617 PrevInChain = NewRed; 1618 } else { 1619 PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true); 1620 NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp); 1621 } 1622 if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) { 1623 NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(), 1624 NewRed, PrevInChain); 1625 } else if (IsOrdered) 1626 NextInChain = NewRed; 1627 else 1628 NextInChain = State.Builder.CreateBinOp( 1629 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain); 1630 State.set(this, NextInChain, Part, /*IsScalar*/ true); 1631 } 1632 } 1633 1634 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1635 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, 1636 VPSlotTracker &SlotTracker) const { 1637 O << Indent << "REDUCE "; 1638 printAsOperand(O, SlotTracker); 1639 O << " = "; 1640 getChainOp()->printAsOperand(O, SlotTracker); 1641 O << " +"; 1642 if (isa<FPMathOperator>(getUnderlyingInstr())) 1643 O << getUnderlyingInstr()->getFastMathFlags(); 1644 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " ("; 1645 getVecOp()->printAsOperand(O, SlotTracker); 1646 if (getCondOp()) { 1647 O << ", "; 1648 getCondOp()->printAsOperand(O, SlotTracker); 1649 } 1650 O << ")"; 1651 if (RdxDesc.IntermediateStore) 1652 O << " (with final reduction value stored in invariant address sank " 1653 "outside of loop)"; 1654 } 1655 #endif 1656 1657 bool VPReplicateRecipe::shouldPack() const { 1658 // Find if the recipe is used by a widened recipe via an intervening 1659 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector. 1660 return any_of(users(), [](const VPUser *U) { 1661 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U)) 1662 return any_of(PredR->users(), [PredR](const VPUser *U) { 1663 return !U->usesScalars(PredR); 1664 }); 1665 return false; 1666 }); 1667 } 1668 1669 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1670 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, 1671 VPSlotTracker &SlotTracker) const { 1672 O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); 1673 1674 if (!getUnderlyingInstr()->getType()->isVoidTy()) { 1675 printAsOperand(O, SlotTracker); 1676 O << " = "; 1677 } 1678 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) { 1679 O << "call"; 1680 printFlags(O); 1681 O << "@" << CB->getCalledFunction()->getName() << "("; 1682 interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)), 1683 O, [&O, &SlotTracker](VPValue *Op) { 1684 Op->printAsOperand(O, SlotTracker); 1685 }); 1686 O << ")"; 1687 } else { 1688 O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()); 1689 printFlags(O); 1690 printOperands(O, SlotTracker); 1691 } 1692 1693 if (shouldPack()) 1694 O << " (S->V)"; 1695 } 1696 #endif 1697 1698 /// Checks if \p C is uniform across all VFs and UFs. It is considered as such 1699 /// if it is either defined outside the vector region or its operand is known to 1700 /// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI). 1701 /// TODO: Uniformity should be associated with a VPValue and there should be a 1702 /// generic way to check. 1703 static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C) { 1704 return C->isDefinedOutsideVectorRegions() || 1705 isa<VPDerivedIVRecipe>(C->getOperand(0)) || 1706 isa<VPCanonicalIVPHIRecipe>(C->getOperand(0)); 1707 } 1708 1709 Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) { 1710 assert(vputils::onlyFirstLaneUsed(this) && 1711 "Codegen only implemented for first lane."); 1712 switch (Opcode) { 1713 case Instruction::SExt: 1714 case Instruction::ZExt: 1715 case Instruction::Trunc: { 1716 // Note: SExt/ZExt not used yet. 1717 Value *Op = State.get(getOperand(0), VPIteration(Part, 0)); 1718 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy); 1719 } 1720 default: 1721 llvm_unreachable("opcode not implemented yet"); 1722 } 1723 } 1724 1725 void VPScalarCastRecipe ::execute(VPTransformState &State) { 1726 bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this); 1727 for (unsigned Part = 0; Part != State.UF; ++Part) { 1728 Value *Res; 1729 // Only generate a single instance, if the recipe is uniform across UFs and 1730 // VFs. 1731 if (Part > 0 && IsUniformAcrossVFsAndUFs) 1732 Res = State.get(this, VPIteration(0, 0)); 1733 else 1734 Res = generate(State, Part); 1735 State.set(this, Res, VPIteration(Part, 0)); 1736 } 1737 } 1738 1739 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1740 void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent, 1741 VPSlotTracker &SlotTracker) const { 1742 O << Indent << "SCALAR-CAST "; 1743 printAsOperand(O, SlotTracker); 1744 O << " = " << Instruction::getOpcodeName(Opcode) << " "; 1745 printOperands(O, SlotTracker); 1746 O << " to " << *ResultTy; 1747 } 1748 #endif 1749 1750 void VPBranchOnMaskRecipe::execute(VPTransformState &State) { 1751 assert(State.Instance && "Branch on Mask works only on single instance."); 1752 1753 unsigned Part = State.Instance->Part; 1754 unsigned Lane = State.Instance->Lane.getKnownLane(); 1755 1756 Value *ConditionBit = nullptr; 1757 VPValue *BlockInMask = getMask(); 1758 if (BlockInMask) { 1759 ConditionBit = State.get(BlockInMask, Part); 1760 if (ConditionBit->getType()->isVectorTy()) 1761 ConditionBit = State.Builder.CreateExtractElement( 1762 ConditionBit, State.Builder.getInt32(Lane)); 1763 } else // Block in mask is all-one. 1764 ConditionBit = State.Builder.getTrue(); 1765 1766 // Replace the temporary unreachable terminator with a new conditional branch, 1767 // whose two destinations will be set later when they are created. 1768 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator(); 1769 assert(isa<UnreachableInst>(CurrentTerminator) && 1770 "Expected to replace unreachable terminator with conditional branch."); 1771 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit); 1772 CondBr->setSuccessor(0, nullptr); 1773 ReplaceInstWithInst(CurrentTerminator, CondBr); 1774 } 1775 1776 void VPPredInstPHIRecipe::execute(VPTransformState &State) { 1777 assert(State.Instance && "Predicated instruction PHI works per instance."); 1778 Instruction *ScalarPredInst = 1779 cast<Instruction>(State.get(getOperand(0), *State.Instance)); 1780 BasicBlock *PredicatedBB = ScalarPredInst->getParent(); 1781 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); 1782 assert(PredicatingBB && "Predicated block has no single predecessor."); 1783 assert(isa<VPReplicateRecipe>(getOperand(0)) && 1784 "operand must be VPReplicateRecipe"); 1785 1786 // By current pack/unpack logic we need to generate only a single phi node: if 1787 // a vector value for the predicated instruction exists at this point it means 1788 // the instruction has vector users only, and a phi for the vector value is 1789 // needed. In this case the recipe of the predicated instruction is marked to 1790 // also do that packing, thereby "hoisting" the insert-element sequence. 1791 // Otherwise, a phi node for the scalar value is needed. 1792 unsigned Part = State.Instance->Part; 1793 if (State.hasVectorValue(getOperand(0), Part)) { 1794 Value *VectorValue = State.get(getOperand(0), Part); 1795 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue); 1796 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); 1797 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. 1798 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. 1799 if (State.hasVectorValue(this, Part)) 1800 State.reset(this, VPhi, Part); 1801 else 1802 State.set(this, VPhi, Part); 1803 // NOTE: Currently we need to update the value of the operand, so the next 1804 // predicated iteration inserts its generated value in the correct vector. 1805 State.reset(getOperand(0), VPhi, Part); 1806 } else { 1807 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType(); 1808 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); 1809 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), 1810 PredicatingBB); 1811 Phi->addIncoming(ScalarPredInst, PredicatedBB); 1812 if (State.hasScalarValue(this, *State.Instance)) 1813 State.reset(this, Phi, *State.Instance); 1814 else 1815 State.set(this, Phi, *State.Instance); 1816 // NOTE: Currently we need to update the value of the operand, so the next 1817 // predicated iteration inserts its generated value in the correct vector. 1818 State.reset(getOperand(0), Phi, *State.Instance); 1819 } 1820 } 1821 1822 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1823 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1824 VPSlotTracker &SlotTracker) const { 1825 O << Indent << "PHI-PREDICATED-INSTRUCTION "; 1826 printAsOperand(O, SlotTracker); 1827 O << " = "; 1828 printOperands(O, SlotTracker); 1829 } 1830 1831 void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent, 1832 VPSlotTracker &SlotTracker) const { 1833 O << Indent << "WIDEN "; 1834 printAsOperand(O, SlotTracker); 1835 O << " = load "; 1836 printOperands(O, SlotTracker); 1837 } 1838 1839 void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent, 1840 VPSlotTracker &SlotTracker) const { 1841 O << Indent << "WIDEN "; 1842 printAsOperand(O, SlotTracker); 1843 O << " = vp.load "; 1844 printOperands(O, SlotTracker); 1845 } 1846 1847 void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent, 1848 VPSlotTracker &SlotTracker) const { 1849 O << Indent << "WIDEN store "; 1850 printOperands(O, SlotTracker); 1851 } 1852 1853 void VPWidenStoreEVLRecipe::print(raw_ostream &O, const Twine &Indent, 1854 VPSlotTracker &SlotTracker) const { 1855 O << Indent << "WIDEN vp.store "; 1856 printOperands(O, SlotTracker); 1857 } 1858 #endif 1859 1860 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { 1861 Value *Start = getStartValue()->getLiveInIRValue(); 1862 PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index"); 1863 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); 1864 1865 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1866 EntryPart->addIncoming(Start, VectorPH); 1867 EntryPart->setDebugLoc(getDebugLoc()); 1868 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 1869 State.set(this, EntryPart, Part, /*IsScalar*/ true); 1870 } 1871 1872 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1873 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1874 VPSlotTracker &SlotTracker) const { 1875 O << Indent << "EMIT "; 1876 printAsOperand(O, SlotTracker); 1877 O << " = CANONICAL-INDUCTION "; 1878 printOperands(O, SlotTracker); 1879 } 1880 #endif 1881 1882 bool VPCanonicalIVPHIRecipe::isCanonical( 1883 InductionDescriptor::InductionKind Kind, VPValue *Start, 1884 VPValue *Step) const { 1885 // Must be an integer induction. 1886 if (Kind != InductionDescriptor::IK_IntInduction) 1887 return false; 1888 // Start must match the start value of this canonical induction. 1889 if (Start != getStartValue()) 1890 return false; 1891 1892 // If the step is defined by a recipe, it is not a ConstantInt. 1893 if (Step->getDefiningRecipe()) 1894 return false; 1895 1896 ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue()); 1897 return StepC && StepC->isOne(); 1898 } 1899 1900 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) { 1901 return IsScalarAfterVectorization && 1902 (!IsScalable || vputils::onlyFirstLaneUsed(this)); 1903 } 1904 1905 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1906 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, 1907 VPSlotTracker &SlotTracker) const { 1908 O << Indent << "EMIT "; 1909 printAsOperand(O, SlotTracker); 1910 O << " = WIDEN-POINTER-INDUCTION "; 1911 getStartValue()->printAsOperand(O, SlotTracker); 1912 O << ", " << *IndDesc.getStep(); 1913 } 1914 #endif 1915 1916 void VPExpandSCEVRecipe::execute(VPTransformState &State) { 1917 assert(!State.Instance && "cannot be used in per-lane"); 1918 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout(); 1919 SCEVExpander Exp(SE, DL, "induction"); 1920 1921 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), 1922 &*State.Builder.GetInsertPoint()); 1923 assert(!State.ExpandedSCEVs.contains(Expr) && 1924 "Same SCEV expanded multiple times"); 1925 State.ExpandedSCEVs[Expr] = Res; 1926 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 1927 State.set(this, Res, {Part, 0}); 1928 } 1929 1930 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1931 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent, 1932 VPSlotTracker &SlotTracker) const { 1933 O << Indent << "EMIT "; 1934 getVPSingleValue()->printAsOperand(O, SlotTracker); 1935 O << " = EXPAND SCEV " << *Expr; 1936 } 1937 #endif 1938 1939 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { 1940 Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true); 1941 Type *STy = CanonicalIV->getType(); 1942 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 1943 ElementCount VF = State.VF; 1944 Value *VStart = VF.isScalar() 1945 ? CanonicalIV 1946 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast"); 1947 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 1948 Value *VStep = createStepForVF(Builder, STy, VF, Part); 1949 if (VF.isVector()) { 1950 VStep = Builder.CreateVectorSplat(VF, VStep); 1951 VStep = 1952 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType())); 1953 } 1954 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); 1955 State.set(this, CanonicalVectorIV, Part); 1956 } 1957 } 1958 1959 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1960 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, 1961 VPSlotTracker &SlotTracker) const { 1962 O << Indent << "EMIT "; 1963 printAsOperand(O, SlotTracker); 1964 O << " = WIDEN-CANONICAL-INDUCTION "; 1965 printOperands(O, SlotTracker); 1966 } 1967 #endif 1968 1969 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { 1970 auto &Builder = State.Builder; 1971 // Create a vector from the initial value. 1972 auto *VectorInit = getStartValue()->getLiveInIRValue(); 1973 1974 Type *VecTy = State.VF.isScalar() 1975 ? VectorInit->getType() 1976 : VectorType::get(VectorInit->getType(), State.VF); 1977 1978 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1979 if (State.VF.isVector()) { 1980 auto *IdxTy = Builder.getInt32Ty(); 1981 auto *One = ConstantInt::get(IdxTy, 1); 1982 IRBuilder<>::InsertPointGuard Guard(Builder); 1983 Builder.SetInsertPoint(VectorPH->getTerminator()); 1984 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); 1985 auto *LastIdx = Builder.CreateSub(RuntimeVF, One); 1986 VectorInit = Builder.CreateInsertElement( 1987 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); 1988 } 1989 1990 // Create a phi node for the new recurrence. 1991 PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur"); 1992 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); 1993 EntryPart->addIncoming(VectorInit, VectorPH); 1994 State.set(this, EntryPart, 0); 1995 } 1996 1997 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1998 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, 1999 VPSlotTracker &SlotTracker) const { 2000 O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; 2001 printAsOperand(O, SlotTracker); 2002 O << " = phi "; 2003 printOperands(O, SlotTracker); 2004 } 2005 #endif 2006 2007 void VPReductionPHIRecipe::execute(VPTransformState &State) { 2008 auto &Builder = State.Builder; 2009 2010 // Reductions do not have to start at zero. They can start with 2011 // any loop invariant values. 2012 VPValue *StartVPV = getStartValue(); 2013 Value *StartV = StartVPV->getLiveInIRValue(); 2014 2015 // In order to support recurrences we need to be able to vectorize Phi nodes. 2016 // Phi nodes have cycles, so we need to vectorize them in two stages. This is 2017 // stage #1: We create a new vector PHI node with no incoming edges. We'll use 2018 // this value when we vectorize all of the instructions that use the PHI. 2019 bool ScalarPHI = State.VF.isScalar() || IsInLoop; 2020 Type *VecTy = ScalarPHI ? StartV->getType() 2021 : VectorType::get(StartV->getType(), State.VF); 2022 2023 BasicBlock *HeaderBB = State.CFG.PrevBB; 2024 assert(State.CurrentVectorLoop->getHeader() == HeaderBB && 2025 "recipe must be in the vector loop header"); 2026 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF; 2027 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 2028 Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi"); 2029 EntryPart->insertBefore(HeaderBB->getFirstInsertionPt()); 2030 State.set(this, EntryPart, Part, IsInLoop); 2031 } 2032 2033 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 2034 2035 Value *Iden = nullptr; 2036 RecurKind RK = RdxDesc.getRecurrenceKind(); 2037 if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || 2038 RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) { 2039 // MinMax and AnyOf reductions have the start value as their identity. 2040 if (ScalarPHI) { 2041 Iden = StartV; 2042 } else { 2043 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 2044 Builder.SetInsertPoint(VectorPH->getTerminator()); 2045 StartV = Iden = 2046 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); 2047 } 2048 } else { 2049 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(), 2050 RdxDesc.getFastMathFlags()); 2051 2052 if (!ScalarPHI) { 2053 Iden = Builder.CreateVectorSplat(State.VF, Iden); 2054 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 2055 Builder.SetInsertPoint(VectorPH->getTerminator()); 2056 Constant *Zero = Builder.getInt32(0); 2057 StartV = Builder.CreateInsertElement(Iden, StartV, Zero); 2058 } 2059 } 2060 2061 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 2062 Value *EntryPart = State.get(this, Part, IsInLoop); 2063 // Make sure to add the reduction start value only to the 2064 // first unroll part. 2065 Value *StartVal = (Part == 0) ? StartV : Iden; 2066 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH); 2067 } 2068 } 2069 2070 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2071 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, 2072 VPSlotTracker &SlotTracker) const { 2073 O << Indent << "WIDEN-REDUCTION-PHI "; 2074 2075 printAsOperand(O, SlotTracker); 2076 O << " = phi "; 2077 printOperands(O, SlotTracker); 2078 } 2079 #endif 2080 2081 void VPWidenPHIRecipe::execute(VPTransformState &State) { 2082 assert(EnableVPlanNativePath && 2083 "Non-native vplans are not expected to have VPWidenPHIRecipes."); 2084 2085 Value *Op0 = State.get(getOperand(0), 0); 2086 Type *VecTy = Op0->getType(); 2087 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); 2088 State.set(this, VecPhi, 0); 2089 } 2090 2091 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2092 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, 2093 VPSlotTracker &SlotTracker) const { 2094 O << Indent << "WIDEN-PHI "; 2095 2096 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue()); 2097 // Unless all incoming values are modeled in VPlan print the original PHI 2098 // directly. 2099 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming 2100 // values as VPValues. 2101 if (getNumOperands() != OriginalPhi->getNumOperands()) { 2102 O << VPlanIngredient(OriginalPhi); 2103 return; 2104 } 2105 2106 printAsOperand(O, SlotTracker); 2107 O << " = phi "; 2108 printOperands(O, SlotTracker); 2109 } 2110 #endif 2111 2112 // TODO: It would be good to use the existing VPWidenPHIRecipe instead and 2113 // remove VPActiveLaneMaskPHIRecipe. 2114 void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) { 2115 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 2116 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 2117 Value *StartMask = State.get(getOperand(0), Part); 2118 PHINode *EntryPart = 2119 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask"); 2120 EntryPart->addIncoming(StartMask, VectorPH); 2121 EntryPart->setDebugLoc(getDebugLoc()); 2122 State.set(this, EntryPart, Part); 2123 } 2124 } 2125 2126 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2127 void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent, 2128 VPSlotTracker &SlotTracker) const { 2129 O << Indent << "ACTIVE-LANE-MASK-PHI "; 2130 2131 printAsOperand(O, SlotTracker); 2132 O << " = phi "; 2133 printOperands(O, SlotTracker); 2134 } 2135 #endif 2136 2137 void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) { 2138 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 2139 assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization."); 2140 Value *Start = State.get(getOperand(0), VPIteration(0, 0)); 2141 PHINode *EntryPart = 2142 State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv"); 2143 EntryPart->addIncoming(Start, VectorPH); 2144 EntryPart->setDebugLoc(getDebugLoc()); 2145 State.set(this, EntryPart, 0, /*IsScalar=*/true); 2146 } 2147 2148 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2149 void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 2150 VPSlotTracker &SlotTracker) const { 2151 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI "; 2152 2153 printAsOperand(O, SlotTracker); 2154 O << " = phi "; 2155 printOperands(O, SlotTracker); 2156 } 2157 #endif 2158