1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains implementations for different VPlan recipes. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "VPlan.h" 15 #include "VPlanAnalysis.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/Twine.h" 19 #include "llvm/Analysis/IVDescriptors.h" 20 #include "llvm/IR/BasicBlock.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/Instruction.h" 23 #include "llvm/IR/Instructions.h" 24 #include "llvm/IR/Type.h" 25 #include "llvm/IR/Value.h" 26 #include "llvm/Support/Casting.h" 27 #include "llvm/Support/CommandLine.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 31 #include "llvm/Transforms/Utils/LoopUtils.h" 32 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 33 #include <cassert> 34 35 using namespace llvm; 36 37 using VectorParts = SmallVector<Value *, 2>; 38 39 namespace llvm { 40 extern cl::opt<bool> EnableVPlanNativePath; 41 } 42 43 #define LV_NAME "loop-vectorize" 44 #define DEBUG_TYPE LV_NAME 45 46 bool VPRecipeBase::mayWriteToMemory() const { 47 switch (getVPDefID()) { 48 case VPInterleaveSC: 49 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0; 50 case VPWidenStoreEVLSC: 51 case VPWidenStoreSC: 52 return true; 53 case VPReplicateSC: 54 case VPWidenCallSC: 55 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 56 ->mayWriteToMemory(); 57 case VPBranchOnMaskSC: 58 case VPScalarIVStepsSC: 59 case VPPredInstPHISC: 60 return false; 61 case VPBlendSC: 62 case VPReductionSC: 63 case VPWidenCanonicalIVSC: 64 case VPWidenCastSC: 65 case VPWidenGEPSC: 66 case VPWidenIntOrFpInductionSC: 67 case VPWidenLoadEVLSC: 68 case VPWidenLoadSC: 69 case VPWidenPHISC: 70 case VPWidenSC: 71 case VPWidenSelectSC: { 72 const Instruction *I = 73 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 74 (void)I; 75 assert((!I || !I->mayWriteToMemory()) && 76 "underlying instruction may write to memory"); 77 return false; 78 } 79 default: 80 return true; 81 } 82 } 83 84 bool VPRecipeBase::mayReadFromMemory() const { 85 switch (getVPDefID()) { 86 case VPWidenLoadEVLSC: 87 case VPWidenLoadSC: 88 return true; 89 case VPReplicateSC: 90 case VPWidenCallSC: 91 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 92 ->mayReadFromMemory(); 93 case VPBranchOnMaskSC: 94 case VPPredInstPHISC: 95 case VPScalarIVStepsSC: 96 case VPWidenStoreEVLSC: 97 case VPWidenStoreSC: 98 return false; 99 case VPBlendSC: 100 case VPReductionSC: 101 case VPWidenCanonicalIVSC: 102 case VPWidenCastSC: 103 case VPWidenGEPSC: 104 case VPWidenIntOrFpInductionSC: 105 case VPWidenPHISC: 106 case VPWidenSC: 107 case VPWidenSelectSC: { 108 const Instruction *I = 109 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 110 (void)I; 111 assert((!I || !I->mayReadFromMemory()) && 112 "underlying instruction may read from memory"); 113 return false; 114 } 115 default: 116 return true; 117 } 118 } 119 120 bool VPRecipeBase::mayHaveSideEffects() const { 121 switch (getVPDefID()) { 122 case VPDerivedIVSC: 123 case VPPredInstPHISC: 124 case VPScalarCastSC: 125 return false; 126 case VPInstructionSC: 127 switch (cast<VPInstruction>(this)->getOpcode()) { 128 case Instruction::Or: 129 case Instruction::ICmp: 130 case Instruction::Select: 131 case VPInstruction::Not: 132 case VPInstruction::CalculateTripCountMinusVF: 133 case VPInstruction::CanonicalIVIncrementForPart: 134 case VPInstruction::PtrAdd: 135 return false; 136 default: 137 return true; 138 } 139 case VPWidenCallSC: 140 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 141 ->mayHaveSideEffects(); 142 case VPBlendSC: 143 case VPReductionSC: 144 case VPScalarIVStepsSC: 145 case VPWidenCanonicalIVSC: 146 case VPWidenCastSC: 147 case VPWidenGEPSC: 148 case VPWidenIntOrFpInductionSC: 149 case VPWidenPHISC: 150 case VPWidenPointerInductionSC: 151 case VPWidenSC: 152 case VPWidenSelectSC: { 153 const Instruction *I = 154 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 155 (void)I; 156 assert((!I || !I->mayHaveSideEffects()) && 157 "underlying instruction has side-effects"); 158 return false; 159 } 160 case VPInterleaveSC: 161 return mayWriteToMemory(); 162 case VPWidenLoadEVLSC: 163 case VPWidenLoadSC: 164 case VPWidenStoreEVLSC: 165 case VPWidenStoreSC: 166 assert( 167 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() == 168 mayWriteToMemory() && 169 "mayHaveSideffects result for ingredient differs from this " 170 "implementation"); 171 return mayWriteToMemory(); 172 case VPReplicateSC: { 173 auto *R = cast<VPReplicateRecipe>(this); 174 return R->getUnderlyingInstr()->mayHaveSideEffects(); 175 } 176 default: 177 return true; 178 } 179 } 180 181 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { 182 auto Lane = VPLane::getLastLaneForVF(State.VF); 183 VPValue *ExitValue = getOperand(0); 184 if (vputils::isUniformAfterVectorization(ExitValue)) 185 Lane = VPLane::getFirstLane(); 186 VPBasicBlock *MiddleVPBB = 187 cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor()); 188 assert(MiddleVPBB->getNumSuccessors() == 0 && 189 "the middle block must not have any successors"); 190 BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB]; 191 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)), 192 MiddleBB); 193 } 194 195 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 196 void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const { 197 O << "Live-out "; 198 getPhi()->printAsOperand(O); 199 O << " = "; 200 getOperand(0)->printAsOperand(O, SlotTracker); 201 O << "\n"; 202 } 203 #endif 204 205 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { 206 assert(!Parent && "Recipe already in some VPBasicBlock"); 207 assert(InsertPos->getParent() && 208 "Insertion position not in any VPBasicBlock"); 209 InsertPos->getParent()->insert(this, InsertPos->getIterator()); 210 } 211 212 void VPRecipeBase::insertBefore(VPBasicBlock &BB, 213 iplist<VPRecipeBase>::iterator I) { 214 assert(!Parent && "Recipe already in some VPBasicBlock"); 215 assert(I == BB.end() || I->getParent() == &BB); 216 BB.insert(this, I); 217 } 218 219 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { 220 assert(!Parent && "Recipe already in some VPBasicBlock"); 221 assert(InsertPos->getParent() && 222 "Insertion position not in any VPBasicBlock"); 223 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator())); 224 } 225 226 void VPRecipeBase::removeFromParent() { 227 assert(getParent() && "Recipe not in any VPBasicBlock"); 228 getParent()->getRecipeList().remove(getIterator()); 229 Parent = nullptr; 230 } 231 232 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { 233 assert(getParent() && "Recipe not in any VPBasicBlock"); 234 return getParent()->getRecipeList().erase(getIterator()); 235 } 236 237 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { 238 removeFromParent(); 239 insertAfter(InsertPos); 240 } 241 242 void VPRecipeBase::moveBefore(VPBasicBlock &BB, 243 iplist<VPRecipeBase>::iterator I) { 244 removeFromParent(); 245 insertBefore(BB, I); 246 } 247 248 FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const { 249 assert(OpType == OperationType::FPMathOp && 250 "recipe doesn't have fast math flags"); 251 FastMathFlags Res; 252 Res.setAllowReassoc(FMFs.AllowReassoc); 253 Res.setNoNaNs(FMFs.NoNaNs); 254 Res.setNoInfs(FMFs.NoInfs); 255 Res.setNoSignedZeros(FMFs.NoSignedZeros); 256 Res.setAllowReciprocal(FMFs.AllowReciprocal); 257 Res.setAllowContract(FMFs.AllowContract); 258 Res.setApproxFunc(FMFs.ApproxFunc); 259 return Res; 260 } 261 262 VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, 263 VPValue *A, VPValue *B, DebugLoc DL, 264 const Twine &Name) 265 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}), 266 Pred, DL), 267 Opcode(Opcode), Name(Name.str()) { 268 assert(Opcode == Instruction::ICmp && 269 "only ICmp predicates supported at the moment"); 270 } 271 272 VPInstruction::VPInstruction(unsigned Opcode, 273 std::initializer_list<VPValue *> Operands, 274 FastMathFlags FMFs, DebugLoc DL, const Twine &Name) 275 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL), 276 Opcode(Opcode), Name(Name.str()) { 277 // Make sure the VPInstruction is a floating-point operation. 278 assert(isFPMathOp() && "this op can't take fast-math flags"); 279 } 280 281 bool VPInstruction::doesGeneratePerAllLanes() const { 282 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this); 283 } 284 285 bool VPInstruction::canGenerateScalarForFirstLane() const { 286 if (Instruction::isBinaryOp(getOpcode())) 287 return true; 288 289 switch (Opcode) { 290 case VPInstruction::BranchOnCond: 291 case VPInstruction::BranchOnCount: 292 case VPInstruction::CalculateTripCountMinusVF: 293 case VPInstruction::CanonicalIVIncrementForPart: 294 case VPInstruction::ComputeReductionResult: 295 case VPInstruction::PtrAdd: 296 case VPInstruction::ExplicitVectorLength: 297 return true; 298 default: 299 return false; 300 } 301 } 302 303 Value *VPInstruction::generatePerLane(VPTransformState &State, 304 const VPIteration &Lane) { 305 IRBuilderBase &Builder = State.Builder; 306 307 assert(getOpcode() == VPInstruction::PtrAdd && 308 "only PtrAdd opcodes are supported for now"); 309 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane), 310 State.get(getOperand(1), Lane), Name); 311 } 312 313 Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { 314 IRBuilderBase &Builder = State.Builder; 315 316 if (Instruction::isBinaryOp(getOpcode())) { 317 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); 318 if (Part != 0 && vputils::onlyFirstPartUsed(this)) 319 return State.get(this, 0, OnlyFirstLaneUsed); 320 321 Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed); 322 Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed); 323 auto *Res = 324 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name); 325 if (auto *I = dyn_cast<Instruction>(Res)) 326 setFlags(I); 327 return Res; 328 } 329 330 switch (getOpcode()) { 331 case VPInstruction::Not: { 332 Value *A = State.get(getOperand(0), Part); 333 return Builder.CreateNot(A, Name); 334 } 335 case Instruction::ICmp: { 336 Value *A = State.get(getOperand(0), Part); 337 Value *B = State.get(getOperand(1), Part); 338 return Builder.CreateCmp(getPredicate(), A, B, Name); 339 } 340 case Instruction::Select: { 341 Value *Cond = State.get(getOperand(0), Part); 342 Value *Op1 = State.get(getOperand(1), Part); 343 Value *Op2 = State.get(getOperand(2), Part); 344 return Builder.CreateSelect(Cond, Op1, Op2, Name); 345 } 346 case VPInstruction::ActiveLaneMask: { 347 // Get first lane of vector induction variable. 348 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); 349 // Get the original loop tripcount. 350 Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0)); 351 352 // If this part of the active lane mask is scalar, generate the CMP directly 353 // to avoid unnecessary extracts. 354 if (State.VF.isScalar()) 355 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC, 356 Name); 357 358 auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); 359 auto *PredTy = VectorType::get(Int1Ty, State.VF); 360 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask, 361 {PredTy, ScalarTC->getType()}, 362 {VIVElem0, ScalarTC}, nullptr, Name); 363 } 364 case VPInstruction::FirstOrderRecurrenceSplice: { 365 // Generate code to combine the previous and current values in vector v3. 366 // 367 // vector.ph: 368 // v_init = vector(..., ..., ..., a[-1]) 369 // br vector.body 370 // 371 // vector.body 372 // i = phi [0, vector.ph], [i+4, vector.body] 373 // v1 = phi [v_init, vector.ph], [v2, vector.body] 374 // v2 = a[i, i+1, i+2, i+3]; 375 // v3 = vector(v1(3), v2(0, 1, 2)) 376 377 // For the first part, use the recurrence phi (v1), otherwise v2. 378 auto *V1 = State.get(getOperand(0), 0); 379 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); 380 if (!PartMinus1->getType()->isVectorTy()) 381 return PartMinus1; 382 Value *V2 = State.get(getOperand(1), Part); 383 return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name); 384 } 385 case VPInstruction::CalculateTripCountMinusVF: { 386 if (Part != 0) 387 return State.get(this, 0, /*IsScalar*/ true); 388 389 Value *ScalarTC = State.get(getOperand(0), {0, 0}); 390 Value *Step = 391 createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF); 392 Value *Sub = Builder.CreateSub(ScalarTC, Step); 393 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step); 394 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0); 395 return Builder.CreateSelect(Cmp, Sub, Zero); 396 } 397 case VPInstruction::ExplicitVectorLength: { 398 // Compute EVL 399 auto GetEVL = [=](VPTransformState &State, Value *AVL) { 400 assert(AVL->getType()->isIntegerTy() && 401 "Requested vector length should be an integer."); 402 403 // TODO: Add support for MaxSafeDist for correct loop emission. 404 assert(State.VF.isScalable() && "Expected scalable vector factor."); 405 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue()); 406 407 Value *EVL = State.Builder.CreateIntrinsic( 408 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length, 409 {AVL, VFArg, State.Builder.getTrue()}); 410 return EVL; 411 }; 412 // TODO: Restructure this code with an explicit remainder loop, vsetvli can 413 // be outside of the main loop. 414 assert(Part == 0 && "No unrolling expected for predicated vectorization."); 415 // Compute VTC - IV as the AVL (requested vector length). 416 Value *Index = State.get(getOperand(0), VPIteration(0, 0)); 417 Value *TripCount = State.get(getOperand(1), VPIteration(0, 0)); 418 Value *AVL = State.Builder.CreateSub(TripCount, Index); 419 Value *EVL = GetEVL(State, AVL); 420 return EVL; 421 } 422 case VPInstruction::CanonicalIVIncrementForPart: { 423 auto *IV = State.get(getOperand(0), VPIteration(0, 0)); 424 if (Part == 0) 425 return IV; 426 427 // The canonical IV is incremented by the vectorization factor (num of SIMD 428 // elements) times the unroll part. 429 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part); 430 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(), 431 hasNoSignedWrap()); 432 } 433 case VPInstruction::BranchOnCond: { 434 if (Part != 0) 435 return nullptr; 436 437 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0)); 438 VPRegionBlock *ParentRegion = getParent()->getParent(); 439 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock(); 440 441 // Replace the temporary unreachable terminator with a new conditional 442 // branch, hooking it up to backward destination for exiting blocks now and 443 // to forward destination(s) later when they are created. 444 BranchInst *CondBr = 445 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr); 446 447 if (getParent()->isExiting()) 448 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]); 449 450 CondBr->setSuccessor(0, nullptr); 451 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 452 return CondBr; 453 } 454 case VPInstruction::BranchOnCount: { 455 if (Part != 0) 456 return nullptr; 457 // First create the compare. 458 Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true); 459 Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true); 460 Value *Cond = Builder.CreateICmpEQ(IV, TC); 461 462 // Now create the branch. 463 auto *Plan = getParent()->getPlan(); 464 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); 465 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); 466 467 // Replace the temporary unreachable terminator with a new conditional 468 // branch, hooking it up to backward destination (the header) now and to the 469 // forward destination (the exit/middle block) later when it is created. 470 // Note that CreateCondBr expects a valid BB as first argument, so we need 471 // to set it to nullptr later. 472 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), 473 State.CFG.VPBB2IRBB[Header]); 474 CondBr->setSuccessor(0, nullptr); 475 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 476 return CondBr; 477 } 478 case VPInstruction::ComputeReductionResult: { 479 if (Part != 0) 480 return State.get(this, 0, /*IsScalar*/ true); 481 482 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary 483 // and will be removed by breaking up the recipe further. 484 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0)); 485 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue()); 486 // Get its reduction variable descriptor. 487 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); 488 489 RecurKind RK = RdxDesc.getRecurrenceKind(); 490 491 VPValue *LoopExitingDef = getOperand(1); 492 Type *PhiTy = OrigPhi->getType(); 493 VectorParts RdxParts(State.UF); 494 for (unsigned Part = 0; Part < State.UF; ++Part) 495 RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop()); 496 497 // If the vector reduction can be performed in a smaller type, we truncate 498 // then extend the loop exit value to enable InstCombine to evaluate the 499 // entire expression in the smaller type. 500 // TODO: Handle this in truncateToMinBW. 501 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { 502 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF); 503 for (unsigned Part = 0; Part < State.UF; ++Part) 504 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); 505 } 506 // Reduce all of the unrolled parts into a single vector. 507 Value *ReducedPartRdx = RdxParts[0]; 508 unsigned Op = RecurrenceDescriptor::getOpcode(RK); 509 510 if (PhiR->isOrdered()) { 511 ReducedPartRdx = RdxParts[State.UF - 1]; 512 } else { 513 // Floating-point operations should have some FMF to enable the reduction. 514 IRBuilderBase::FastMathFlagGuard FMFG(Builder); 515 Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); 516 for (unsigned Part = 1; Part < State.UF; ++Part) { 517 Value *RdxPart = RdxParts[Part]; 518 if (Op != Instruction::ICmp && Op != Instruction::FCmp) 519 ReducedPartRdx = Builder.CreateBinOp( 520 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx"); 521 else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) { 522 TrackingVH<Value> ReductionStartValue = 523 RdxDesc.getRecurrenceStartValue(); 524 ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK, 525 ReducedPartRdx, RdxPart); 526 } else 527 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart); 528 } 529 } 530 531 // Create the reduction after the loop. Note that inloop reductions create 532 // the target reduction in the loop using a Reduction recipe. 533 if (State.VF.isVector() && !PhiR->isInLoop()) { 534 ReducedPartRdx = 535 createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi); 536 // If the reduction can be performed in a smaller type, we need to extend 537 // the reduction to the wider type before we branch to the original loop. 538 if (PhiTy != RdxDesc.getRecurrenceType()) 539 ReducedPartRdx = RdxDesc.isSigned() 540 ? Builder.CreateSExt(ReducedPartRdx, PhiTy) 541 : Builder.CreateZExt(ReducedPartRdx, PhiTy); 542 } 543 544 // If there were stores of the reduction value to a uniform memory address 545 // inside the loop, create the final store here. 546 if (StoreInst *SI = RdxDesc.IntermediateStore) { 547 auto *NewSI = Builder.CreateAlignedStore( 548 ReducedPartRdx, SI->getPointerOperand(), SI->getAlign()); 549 propagateMetadata(NewSI, SI); 550 } 551 552 return ReducedPartRdx; 553 } 554 case VPInstruction::PtrAdd: { 555 assert(vputils::onlyFirstLaneUsed(this) && 556 "can only generate first lane for PtrAdd"); 557 Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true); 558 Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true); 559 return Builder.CreatePtrAdd(Ptr, Addend, Name); 560 } 561 default: 562 llvm_unreachable("Unsupported opcode for instruction"); 563 } 564 } 565 566 #if !defined(NDEBUG) 567 bool VPInstruction::isFPMathOp() const { 568 // Inspired by FPMathOperator::classof. Notable differences are that we don't 569 // support Call, PHI and Select opcodes here yet. 570 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul || 571 Opcode == Instruction::FNeg || Opcode == Instruction::FSub || 572 Opcode == Instruction::FDiv || Opcode == Instruction::FRem || 573 Opcode == Instruction::FCmp || Opcode == Instruction::Select; 574 } 575 #endif 576 577 void VPInstruction::execute(VPTransformState &State) { 578 assert(!State.Instance && "VPInstruction executing an Instance"); 579 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 580 assert((hasFastMathFlags() == isFPMathOp() || 581 getOpcode() == Instruction::Select) && 582 "Recipe not a FPMathOp but has fast-math flags?"); 583 if (hasFastMathFlags()) 584 State.Builder.setFastMathFlags(getFastMathFlags()); 585 State.setDebugLocFrom(getDebugLoc()); 586 bool GeneratesPerFirstLaneOnly = 587 canGenerateScalarForFirstLane() && 588 (vputils::onlyFirstLaneUsed(this) || 589 getOpcode() == VPInstruction::ComputeReductionResult); 590 bool GeneratesPerAllLanes = doesGeneratePerAllLanes(); 591 for (unsigned Part = 0; Part < State.UF; ++Part) { 592 if (GeneratesPerAllLanes) { 593 for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue(); 594 Lane != NumLanes; ++Lane) { 595 Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane)); 596 assert(GeneratedValue && "generatePerLane must produce a value"); 597 State.set(this, GeneratedValue, VPIteration(Part, Lane)); 598 } 599 continue; 600 } 601 602 Value *GeneratedValue = generatePerPart(State, Part); 603 if (!hasResult()) 604 continue; 605 assert(GeneratedValue && "generatePerPart must produce a value"); 606 assert((GeneratedValue->getType()->isVectorTy() == 607 !GeneratesPerFirstLaneOnly || 608 State.VF.isScalar()) && 609 "scalar value but not only first lane defined"); 610 State.set(this, GeneratedValue, Part, 611 /*IsScalar*/ GeneratesPerFirstLaneOnly); 612 } 613 } 614 615 bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { 616 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); 617 if (Instruction::isBinaryOp(getOpcode())) 618 return vputils::onlyFirstLaneUsed(this); 619 620 switch (getOpcode()) { 621 default: 622 return false; 623 case Instruction::ICmp: 624 case VPInstruction::PtrAdd: 625 // TODO: Cover additional opcodes. 626 return vputils::onlyFirstLaneUsed(this); 627 case VPInstruction::ActiveLaneMask: 628 case VPInstruction::ExplicitVectorLength: 629 case VPInstruction::CalculateTripCountMinusVF: 630 case VPInstruction::CanonicalIVIncrementForPart: 631 case VPInstruction::BranchOnCount: 632 return true; 633 }; 634 llvm_unreachable("switch should return"); 635 } 636 637 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 638 void VPInstruction::dump() const { 639 VPSlotTracker SlotTracker(getParent()->getPlan()); 640 print(dbgs(), "", SlotTracker); 641 } 642 643 void VPInstruction::print(raw_ostream &O, const Twine &Indent, 644 VPSlotTracker &SlotTracker) const { 645 O << Indent << "EMIT "; 646 647 if (hasResult()) { 648 printAsOperand(O, SlotTracker); 649 O << " = "; 650 } 651 652 switch (getOpcode()) { 653 case VPInstruction::Not: 654 O << "not"; 655 break; 656 case VPInstruction::SLPLoad: 657 O << "combined load"; 658 break; 659 case VPInstruction::SLPStore: 660 O << "combined store"; 661 break; 662 case VPInstruction::ActiveLaneMask: 663 O << "active lane mask"; 664 break; 665 case VPInstruction::ExplicitVectorLength: 666 O << "EXPLICIT-VECTOR-LENGTH"; 667 break; 668 case VPInstruction::FirstOrderRecurrenceSplice: 669 O << "first-order splice"; 670 break; 671 case VPInstruction::BranchOnCond: 672 O << "branch-on-cond"; 673 break; 674 case VPInstruction::CalculateTripCountMinusVF: 675 O << "TC > VF ? TC - VF : 0"; 676 break; 677 case VPInstruction::CanonicalIVIncrementForPart: 678 O << "VF * Part +"; 679 break; 680 case VPInstruction::BranchOnCount: 681 O << "branch-on-count"; 682 break; 683 case VPInstruction::ComputeReductionResult: 684 O << "compute-reduction-result"; 685 break; 686 case VPInstruction::PtrAdd: 687 O << "ptradd"; 688 break; 689 default: 690 O << Instruction::getOpcodeName(getOpcode()); 691 } 692 693 printFlags(O); 694 printOperands(O, SlotTracker); 695 696 if (auto DL = getDebugLoc()) { 697 O << ", !dbg "; 698 DL.print(O); 699 } 700 } 701 #endif 702 703 void VPWidenCallRecipe::execute(VPTransformState &State) { 704 assert(State.VF.isVector() && "not widening"); 705 auto &CI = *cast<CallInst>(getUnderlyingInstr()); 706 assert(!isa<DbgInfoIntrinsic>(CI) && 707 "DbgInfoIntrinsic should have been dropped during VPlan construction"); 708 State.setDebugLocFrom(getDebugLoc()); 709 710 bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic; 711 FunctionType *VFTy = nullptr; 712 if (Variant) 713 VFTy = Variant->getFunctionType(); 714 for (unsigned Part = 0; Part < State.UF; ++Part) { 715 SmallVector<Type *, 2> TysForDecl; 716 // Add return type if intrinsic is overloaded on it. 717 if (UseIntrinsic && 718 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) 719 TysForDecl.push_back( 720 VectorType::get(CI.getType()->getScalarType(), State.VF)); 721 SmallVector<Value *, 4> Args; 722 for (const auto &I : enumerate(operands())) { 723 // Some intrinsics have a scalar argument - don't replace it with a 724 // vector. 725 Value *Arg; 726 if (UseIntrinsic && 727 isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())) 728 Arg = State.get(I.value(), VPIteration(0, 0)); 729 // Some vectorized function variants may also take a scalar argument, 730 // e.g. linear parameters for pointers. This needs to be the scalar value 731 // from the start of the respective part when interleaving. 732 else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy()) 733 Arg = State.get(I.value(), VPIteration(Part, 0)); 734 else 735 Arg = State.get(I.value(), Part); 736 if (UseIntrinsic && 737 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index())) 738 TysForDecl.push_back(Arg->getType()); 739 Args.push_back(Arg); 740 } 741 742 Function *VectorF; 743 if (UseIntrinsic) { 744 // Use vector version of the intrinsic. 745 Module *M = State.Builder.GetInsertBlock()->getModule(); 746 VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl); 747 assert(VectorF && "Can't retrieve vector intrinsic."); 748 } else { 749 #ifndef NDEBUG 750 assert(Variant != nullptr && "Can't create vector function."); 751 #endif 752 VectorF = Variant; 753 } 754 755 SmallVector<OperandBundleDef, 1> OpBundles; 756 CI.getOperandBundlesAsDefs(OpBundles); 757 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles); 758 759 if (isa<FPMathOperator>(V)) 760 V->copyFastMathFlags(&CI); 761 762 if (!V->getType()->isVoidTy()) 763 State.set(this, V, Part); 764 State.addMetadata(V, &CI); 765 } 766 } 767 768 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 769 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, 770 VPSlotTracker &SlotTracker) const { 771 O << Indent << "WIDEN-CALL "; 772 773 auto *CI = cast<CallInst>(getUnderlyingInstr()); 774 if (CI->getType()->isVoidTy()) 775 O << "void "; 776 else { 777 printAsOperand(O, SlotTracker); 778 O << " = "; 779 } 780 781 O << "call @" << CI->getCalledFunction()->getName() << "("; 782 printOperands(O, SlotTracker); 783 O << ")"; 784 785 if (VectorIntrinsicID) 786 O << " (using vector intrinsic)"; 787 else { 788 O << " (using library function"; 789 if (Variant->hasName()) 790 O << ": " << Variant->getName(); 791 O << ")"; 792 } 793 } 794 795 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, 796 VPSlotTracker &SlotTracker) const { 797 O << Indent << "WIDEN-SELECT "; 798 printAsOperand(O, SlotTracker); 799 O << " = select "; 800 getOperand(0)->printAsOperand(O, SlotTracker); 801 O << ", "; 802 getOperand(1)->printAsOperand(O, SlotTracker); 803 O << ", "; 804 getOperand(2)->printAsOperand(O, SlotTracker); 805 O << (isInvariantCond() ? " (condition is loop invariant)" : ""); 806 } 807 #endif 808 809 void VPWidenSelectRecipe::execute(VPTransformState &State) { 810 State.setDebugLocFrom(getDebugLoc()); 811 812 // The condition can be loop invariant but still defined inside the 813 // loop. This means that we can't just use the original 'cond' value. 814 // We have to take the 'vectorized' value and pick the first lane. 815 // Instcombine will make this a no-op. 816 auto *InvarCond = 817 isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr; 818 819 for (unsigned Part = 0; Part < State.UF; ++Part) { 820 Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part); 821 Value *Op0 = State.get(getOperand(1), Part); 822 Value *Op1 = State.get(getOperand(2), Part); 823 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1); 824 State.set(this, Sel, Part); 825 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue())); 826 } 827 } 828 829 VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy( 830 const FastMathFlags &FMF) { 831 AllowReassoc = FMF.allowReassoc(); 832 NoNaNs = FMF.noNaNs(); 833 NoInfs = FMF.noInfs(); 834 NoSignedZeros = FMF.noSignedZeros(); 835 AllowReciprocal = FMF.allowReciprocal(); 836 AllowContract = FMF.allowContract(); 837 ApproxFunc = FMF.approxFunc(); 838 } 839 840 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 841 void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const { 842 switch (OpType) { 843 case OperationType::Cmp: 844 O << " " << CmpInst::getPredicateName(getPredicate()); 845 break; 846 case OperationType::DisjointOp: 847 if (DisjointFlags.IsDisjoint) 848 O << " disjoint"; 849 break; 850 case OperationType::PossiblyExactOp: 851 if (ExactFlags.IsExact) 852 O << " exact"; 853 break; 854 case OperationType::OverflowingBinOp: 855 if (WrapFlags.HasNUW) 856 O << " nuw"; 857 if (WrapFlags.HasNSW) 858 O << " nsw"; 859 break; 860 case OperationType::FPMathOp: 861 getFastMathFlags().print(O); 862 break; 863 case OperationType::GEPOp: 864 if (GEPFlags.IsInBounds) 865 O << " inbounds"; 866 break; 867 case OperationType::NonNegOp: 868 if (NonNegFlags.NonNeg) 869 O << " nneg"; 870 break; 871 case OperationType::Other: 872 break; 873 } 874 if (getNumOperands() > 0) 875 O << " "; 876 } 877 #endif 878 879 void VPWidenRecipe::execute(VPTransformState &State) { 880 State.setDebugLocFrom(getDebugLoc()); 881 auto &Builder = State.Builder; 882 switch (Opcode) { 883 case Instruction::Call: 884 case Instruction::Br: 885 case Instruction::PHI: 886 case Instruction::GetElementPtr: 887 case Instruction::Select: 888 llvm_unreachable("This instruction is handled by a different recipe."); 889 case Instruction::UDiv: 890 case Instruction::SDiv: 891 case Instruction::SRem: 892 case Instruction::URem: 893 case Instruction::Add: 894 case Instruction::FAdd: 895 case Instruction::Sub: 896 case Instruction::FSub: 897 case Instruction::FNeg: 898 case Instruction::Mul: 899 case Instruction::FMul: 900 case Instruction::FDiv: 901 case Instruction::FRem: 902 case Instruction::Shl: 903 case Instruction::LShr: 904 case Instruction::AShr: 905 case Instruction::And: 906 case Instruction::Or: 907 case Instruction::Xor: { 908 // Just widen unops and binops. 909 for (unsigned Part = 0; Part < State.UF; ++Part) { 910 SmallVector<Value *, 2> Ops; 911 for (VPValue *VPOp : operands()) 912 Ops.push_back(State.get(VPOp, Part)); 913 914 Value *V = Builder.CreateNAryOp(Opcode, Ops); 915 916 if (auto *VecOp = dyn_cast<Instruction>(V)) 917 setFlags(VecOp); 918 919 // Use this vector value for all users of the original instruction. 920 State.set(this, V, Part); 921 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue())); 922 } 923 924 break; 925 } 926 case Instruction::Freeze: { 927 for (unsigned Part = 0; Part < State.UF; ++Part) { 928 Value *Op = State.get(getOperand(0), Part); 929 930 Value *Freeze = Builder.CreateFreeze(Op); 931 State.set(this, Freeze, Part); 932 } 933 break; 934 } 935 case Instruction::ICmp: 936 case Instruction::FCmp: { 937 // Widen compares. Generate vector compares. 938 bool FCmp = Opcode == Instruction::FCmp; 939 for (unsigned Part = 0; Part < State.UF; ++Part) { 940 Value *A = State.get(getOperand(0), Part); 941 Value *B = State.get(getOperand(1), Part); 942 Value *C = nullptr; 943 if (FCmp) { 944 // Propagate fast math flags. 945 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 946 if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue())) 947 Builder.setFastMathFlags(I->getFastMathFlags()); 948 C = Builder.CreateFCmp(getPredicate(), A, B); 949 } else { 950 C = Builder.CreateICmp(getPredicate(), A, B); 951 } 952 State.set(this, C, Part); 953 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue())); 954 } 955 956 break; 957 } 958 default: 959 // This instruction is not vectorized by simple widening. 960 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : " 961 << Instruction::getOpcodeName(Opcode)); 962 llvm_unreachable("Unhandled instruction!"); 963 } // end of switch. 964 965 #if !defined(NDEBUG) 966 // Verify that VPlan type inference results agree with the type of the 967 // generated values. 968 for (unsigned Part = 0; Part < State.UF; ++Part) { 969 assert(VectorType::get(State.TypeAnalysis.inferScalarType(this), 970 State.VF) == State.get(this, Part)->getType() && 971 "inferred type and type from generated instructions do not match"); 972 } 973 #endif 974 } 975 976 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 977 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, 978 VPSlotTracker &SlotTracker) const { 979 O << Indent << "WIDEN "; 980 printAsOperand(O, SlotTracker); 981 O << " = " << Instruction::getOpcodeName(Opcode); 982 printFlags(O); 983 printOperands(O, SlotTracker); 984 } 985 #endif 986 987 void VPWidenCastRecipe::execute(VPTransformState &State) { 988 State.setDebugLocFrom(getDebugLoc()); 989 auto &Builder = State.Builder; 990 /// Vectorize casts. 991 assert(State.VF.isVector() && "Not vectorizing?"); 992 Type *DestTy = VectorType::get(getResultType(), State.VF); 993 VPValue *Op = getOperand(0); 994 for (unsigned Part = 0; Part < State.UF; ++Part) { 995 if (Part > 0 && Op->isLiveIn()) { 996 // FIXME: Remove once explicit unrolling is implemented using VPlan. 997 State.set(this, State.get(this, 0), Part); 998 continue; 999 } 1000 Value *A = State.get(Op, Part); 1001 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy); 1002 State.set(this, Cast, Part); 1003 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue())); 1004 } 1005 } 1006 1007 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1008 void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent, 1009 VPSlotTracker &SlotTracker) const { 1010 O << Indent << "WIDEN-CAST "; 1011 printAsOperand(O, SlotTracker); 1012 O << " = " << Instruction::getOpcodeName(Opcode) << " "; 1013 printFlags(O); 1014 printOperands(O, SlotTracker); 1015 O << " to " << *getResultType(); 1016 } 1017 #endif 1018 1019 /// This function adds 1020 /// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...) 1021 /// to each vector element of Val. The sequence starts at StartIndex. 1022 /// \p Opcode is relevant for FP induction variable. 1023 static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step, 1024 Instruction::BinaryOps BinOp, ElementCount VF, 1025 IRBuilderBase &Builder) { 1026 assert(VF.isVector() && "only vector VFs are supported"); 1027 1028 // Create and check the types. 1029 auto *ValVTy = cast<VectorType>(Val->getType()); 1030 ElementCount VLen = ValVTy->getElementCount(); 1031 1032 Type *STy = Val->getType()->getScalarType(); 1033 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) && 1034 "Induction Step must be an integer or FP"); 1035 assert(Step->getType() == STy && "Step has wrong type"); 1036 1037 SmallVector<Constant *, 8> Indices; 1038 1039 // Create a vector of consecutive numbers from zero to VF. 1040 VectorType *InitVecValVTy = ValVTy; 1041 if (STy->isFloatingPointTy()) { 1042 Type *InitVecValSTy = 1043 IntegerType::get(STy->getContext(), STy->getScalarSizeInBits()); 1044 InitVecValVTy = VectorType::get(InitVecValSTy, VLen); 1045 } 1046 Value *InitVec = Builder.CreateStepVector(InitVecValVTy); 1047 1048 // Splat the StartIdx 1049 Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx); 1050 1051 if (STy->isIntegerTy()) { 1052 InitVec = Builder.CreateAdd(InitVec, StartIdxSplat); 1053 Step = Builder.CreateVectorSplat(VLen, Step); 1054 assert(Step->getType() == Val->getType() && "Invalid step vec"); 1055 // FIXME: The newly created binary instructions should contain nsw/nuw 1056 // flags, which can be found from the original scalar operations. 1057 Step = Builder.CreateMul(InitVec, Step); 1058 return Builder.CreateAdd(Val, Step, "induction"); 1059 } 1060 1061 // Floating point induction. 1062 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) && 1063 "Binary Opcode should be specified for FP induction"); 1064 InitVec = Builder.CreateUIToFP(InitVec, ValVTy); 1065 InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat); 1066 1067 Step = Builder.CreateVectorSplat(VLen, Step); 1068 Value *MulOp = Builder.CreateFMul(InitVec, Step); 1069 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction"); 1070 } 1071 1072 /// A helper function that returns an integer or floating-point constant with 1073 /// value C. 1074 static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { 1075 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C) 1076 : ConstantFP::get(Ty, C); 1077 } 1078 1079 static Value *getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy, 1080 ElementCount VF) { 1081 assert(FTy->isFloatingPointTy() && "Expected floating point type!"); 1082 Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits()); 1083 Value *RuntimeVF = getRuntimeVF(B, IntTy, VF); 1084 return B.CreateUIToFP(RuntimeVF, FTy); 1085 } 1086 1087 void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { 1088 assert(!State.Instance && "Int or FP induction being replicated."); 1089 1090 Value *Start = getStartValue()->getLiveInIRValue(); 1091 const InductionDescriptor &ID = getInductionDescriptor(); 1092 TruncInst *Trunc = getTruncInst(); 1093 IRBuilderBase &Builder = State.Builder; 1094 assert(IV->getType() == ID.getStartValue()->getType() && "Types must match"); 1095 assert(State.VF.isVector() && "must have vector VF"); 1096 1097 // The value from the original loop to which we are mapping the new induction 1098 // variable. 1099 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV; 1100 1101 // Fast-math-flags propagate from the original induction instruction. 1102 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 1103 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp())) 1104 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); 1105 1106 // Now do the actual transformations, and start with fetching the step value. 1107 Value *Step = State.get(getStepValue(), VPIteration(0, 0)); 1108 1109 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) && 1110 "Expected either an induction phi-node or a truncate of it!"); 1111 1112 // Construct the initial value of the vector IV in the vector loop preheader 1113 auto CurrIP = Builder.saveIP(); 1114 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1115 Builder.SetInsertPoint(VectorPH->getTerminator()); 1116 if (isa<TruncInst>(EntryVal)) { 1117 assert(Start->getType()->isIntegerTy() && 1118 "Truncation requires an integer type"); 1119 auto *TruncType = cast<IntegerType>(EntryVal->getType()); 1120 Step = Builder.CreateTrunc(Step, TruncType); 1121 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType); 1122 } 1123 1124 Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0); 1125 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start); 1126 Value *SteppedStart = getStepVector( 1127 SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder); 1128 1129 // We create vector phi nodes for both integer and floating-point induction 1130 // variables. Here, we determine the kind of arithmetic we will perform. 1131 Instruction::BinaryOps AddOp; 1132 Instruction::BinaryOps MulOp; 1133 if (Step->getType()->isIntegerTy()) { 1134 AddOp = Instruction::Add; 1135 MulOp = Instruction::Mul; 1136 } else { 1137 AddOp = ID.getInductionOpcode(); 1138 MulOp = Instruction::FMul; 1139 } 1140 1141 // Multiply the vectorization factor by the step using integer or 1142 // floating-point arithmetic as appropriate. 1143 Type *StepType = Step->getType(); 1144 Value *RuntimeVF; 1145 if (Step->getType()->isFloatingPointTy()) 1146 RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF); 1147 else 1148 RuntimeVF = getRuntimeVF(Builder, StepType, State.VF); 1149 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF); 1150 1151 // Create a vector splat to use in the induction update. 1152 // 1153 // FIXME: If the step is non-constant, we create the vector splat with 1154 // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't 1155 // handle a constant vector splat. 1156 Value *SplatVF = isa<Constant>(Mul) 1157 ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul)) 1158 : Builder.CreateVectorSplat(State.VF, Mul); 1159 Builder.restoreIP(CurrIP); 1160 1161 // We may need to add the step a number of times, depending on the unroll 1162 // factor. The last of those goes into the PHI. 1163 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind"); 1164 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); 1165 VecInd->setDebugLoc(EntryVal->getDebugLoc()); 1166 Instruction *LastInduction = VecInd; 1167 for (unsigned Part = 0; Part < State.UF; ++Part) { 1168 State.set(this, LastInduction, Part); 1169 1170 if (isa<TruncInst>(EntryVal)) 1171 State.addMetadata(LastInduction, EntryVal); 1172 1173 LastInduction = cast<Instruction>( 1174 Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")); 1175 LastInduction->setDebugLoc(EntryVal->getDebugLoc()); 1176 } 1177 1178 LastInduction->setName("vec.ind.next"); 1179 VecInd->addIncoming(SteppedStart, VectorPH); 1180 // Add induction update using an incorrect block temporarily. The phi node 1181 // will be fixed after VPlan execution. Note that at this point the latch 1182 // block cannot be used, as it does not exist yet. 1183 // TODO: Model increment value in VPlan, by turning the recipe into a 1184 // multi-def and a subclass of VPHeaderPHIRecipe. 1185 VecInd->addIncoming(LastInduction, VectorPH); 1186 } 1187 1188 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1189 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, 1190 VPSlotTracker &SlotTracker) const { 1191 O << Indent << "WIDEN-INDUCTION"; 1192 if (getTruncInst()) { 1193 O << "\\l\""; 1194 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; 1195 O << " +\n" << Indent << "\" "; 1196 getVPValue(0)->printAsOperand(O, SlotTracker); 1197 } else 1198 O << " " << VPlanIngredient(IV); 1199 1200 O << ", "; 1201 getStepValue()->printAsOperand(O, SlotTracker); 1202 } 1203 #endif 1204 1205 bool VPWidenIntOrFpInductionRecipe::isCanonical() const { 1206 // The step may be defined by a recipe in the preheader (e.g. if it requires 1207 // SCEV expansion), but for the canonical induction the step is required to be 1208 // 1, which is represented as live-in. 1209 if (getStepValue()->getDefiningRecipe()) 1210 return false; 1211 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue()); 1212 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); 1213 return StartC && StartC->isZero() && StepC && StepC->isOne(); 1214 } 1215 1216 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1217 void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent, 1218 VPSlotTracker &SlotTracker) const { 1219 O << Indent; 1220 printAsOperand(O, SlotTracker); 1221 O << Indent << "= DERIVED-IV "; 1222 getStartValue()->printAsOperand(O, SlotTracker); 1223 O << " + "; 1224 getOperand(1)->printAsOperand(O, SlotTracker); 1225 O << " * "; 1226 getStepValue()->printAsOperand(O, SlotTracker); 1227 } 1228 #endif 1229 1230 void VPScalarIVStepsRecipe::execute(VPTransformState &State) { 1231 // Fast-math-flags propagate from the original induction instruction. 1232 IRBuilder<>::FastMathFlagGuard FMFG(State.Builder); 1233 if (hasFastMathFlags()) 1234 State.Builder.setFastMathFlags(getFastMathFlags()); 1235 1236 /// Compute scalar induction steps. \p ScalarIV is the scalar induction 1237 /// variable on which to base the steps, \p Step is the size of the step. 1238 1239 Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0)); 1240 Value *Step = State.get(getStepValue(), VPIteration(0, 0)); 1241 IRBuilderBase &Builder = State.Builder; 1242 1243 // Ensure step has the same type as that of scalar IV. 1244 Type *BaseIVTy = BaseIV->getType()->getScalarType(); 1245 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!"); 1246 1247 // We build scalar steps for both integer and floating-point induction 1248 // variables. Here, we determine the kind of arithmetic we will perform. 1249 Instruction::BinaryOps AddOp; 1250 Instruction::BinaryOps MulOp; 1251 if (BaseIVTy->isIntegerTy()) { 1252 AddOp = Instruction::Add; 1253 MulOp = Instruction::Mul; 1254 } else { 1255 AddOp = InductionOpcode; 1256 MulOp = Instruction::FMul; 1257 } 1258 1259 // Determine the number of scalars we need to generate for each unroll 1260 // iteration. 1261 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this); 1262 // Compute the scalar steps and save the results in State. 1263 Type *IntStepTy = 1264 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits()); 1265 Type *VecIVTy = nullptr; 1266 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr; 1267 if (!FirstLaneOnly && State.VF.isScalable()) { 1268 VecIVTy = VectorType::get(BaseIVTy, State.VF); 1269 UnitStepVec = 1270 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF)); 1271 SplatStep = Builder.CreateVectorSplat(State.VF, Step); 1272 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV); 1273 } 1274 1275 unsigned StartPart = 0; 1276 unsigned EndPart = State.UF; 1277 unsigned StartLane = 0; 1278 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue(); 1279 if (State.Instance) { 1280 StartPart = State.Instance->Part; 1281 EndPart = StartPart + 1; 1282 StartLane = State.Instance->Lane.getKnownLane(); 1283 EndLane = StartLane + 1; 1284 } 1285 for (unsigned Part = StartPart; Part < EndPart; ++Part) { 1286 Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part); 1287 1288 if (!FirstLaneOnly && State.VF.isScalable()) { 1289 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0); 1290 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec); 1291 if (BaseIVTy->isFloatingPointTy()) 1292 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy); 1293 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep); 1294 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul); 1295 State.set(this, Add, Part); 1296 // It's useful to record the lane values too for the known minimum number 1297 // of elements so we do those below. This improves the code quality when 1298 // trying to extract the first element, for example. 1299 } 1300 1301 if (BaseIVTy->isFloatingPointTy()) 1302 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy); 1303 1304 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) { 1305 Value *StartIdx = Builder.CreateBinOp( 1306 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane)); 1307 // The step returned by `createStepForVF` is a runtime-evaluated value 1308 // when VF is scalable. Otherwise, it should be folded into a Constant. 1309 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) && 1310 "Expected StartIdx to be folded to a constant when VF is not " 1311 "scalable"); 1312 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step); 1313 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul); 1314 State.set(this, Add, VPIteration(Part, Lane)); 1315 } 1316 } 1317 } 1318 1319 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1320 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, 1321 VPSlotTracker &SlotTracker) const { 1322 O << Indent; 1323 printAsOperand(O, SlotTracker); 1324 O << " = SCALAR-STEPS "; 1325 printOperands(O, SlotTracker); 1326 } 1327 #endif 1328 1329 void VPWidenGEPRecipe::execute(VPTransformState &State) { 1330 assert(State.VF.isVector() && "not widening"); 1331 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr()); 1332 // Construct a vector GEP by widening the operands of the scalar GEP as 1333 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP 1334 // results in a vector of pointers when at least one operand of the GEP 1335 // is vector-typed. Thus, to keep the representation compact, we only use 1336 // vector-typed operands for loop-varying values. 1337 1338 if (areAllOperandsInvariant()) { 1339 // If we are vectorizing, but the GEP has only loop-invariant operands, 1340 // the GEP we build (by only using vector-typed operands for 1341 // loop-varying values) would be a scalar pointer. Thus, to ensure we 1342 // produce a vector of pointers, we need to either arbitrarily pick an 1343 // operand to broadcast, or broadcast a clone of the original GEP. 1344 // Here, we broadcast a clone of the original. 1345 // 1346 // TODO: If at some point we decide to scalarize instructions having 1347 // loop-invariant operands, this special case will no longer be 1348 // required. We would add the scalarization decision to 1349 // collectLoopScalars() and teach getVectorValue() to broadcast 1350 // the lane-zero scalar value. 1351 SmallVector<Value *> Ops; 1352 for (unsigned I = 0, E = getNumOperands(); I != E; I++) 1353 Ops.push_back(State.get(getOperand(I), VPIteration(0, 0))); 1354 1355 auto *NewGEP = 1356 State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0], 1357 ArrayRef(Ops).drop_front(), "", isInBounds()); 1358 for (unsigned Part = 0; Part < State.UF; ++Part) { 1359 Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP); 1360 State.set(this, EntryPart, Part); 1361 State.addMetadata(EntryPart, GEP); 1362 } 1363 } else { 1364 // If the GEP has at least one loop-varying operand, we are sure to 1365 // produce a vector of pointers. But if we are only unrolling, we want 1366 // to produce a scalar GEP for each unroll part. Thus, the GEP we 1367 // produce with the code below will be scalar (if VF == 1) or vector 1368 // (otherwise). Note that for the unroll-only case, we still maintain 1369 // values in the vector mapping with initVector, as we do for other 1370 // instructions. 1371 for (unsigned Part = 0; Part < State.UF; ++Part) { 1372 // The pointer operand of the new GEP. If it's loop-invariant, we 1373 // won't broadcast it. 1374 auto *Ptr = isPointerLoopInvariant() 1375 ? State.get(getOperand(0), VPIteration(0, 0)) 1376 : State.get(getOperand(0), Part); 1377 1378 // Collect all the indices for the new GEP. If any index is 1379 // loop-invariant, we won't broadcast it. 1380 SmallVector<Value *, 4> Indices; 1381 for (unsigned I = 1, E = getNumOperands(); I < E; I++) { 1382 VPValue *Operand = getOperand(I); 1383 if (isIndexLoopInvariant(I - 1)) 1384 Indices.push_back(State.get(Operand, VPIteration(0, 0))); 1385 else 1386 Indices.push_back(State.get(Operand, Part)); 1387 } 1388 1389 // Create the new GEP. Note that this GEP may be a scalar if VF == 1, 1390 // but it should be a vector, otherwise. 1391 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr, 1392 Indices, "", isInBounds()); 1393 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && 1394 "NewGEP is not a pointer vector"); 1395 State.set(this, NewGEP, Part); 1396 State.addMetadata(NewGEP, GEP); 1397 } 1398 } 1399 } 1400 1401 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1402 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, 1403 VPSlotTracker &SlotTracker) const { 1404 O << Indent << "WIDEN-GEP "; 1405 O << (isPointerLoopInvariant() ? "Inv" : "Var"); 1406 for (size_t I = 0; I < getNumOperands() - 1; ++I) 1407 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]"; 1408 1409 O << " "; 1410 printAsOperand(O, SlotTracker); 1411 O << " = getelementptr"; 1412 printFlags(O); 1413 printOperands(O, SlotTracker); 1414 } 1415 #endif 1416 1417 void VPVectorPointerRecipe ::execute(VPTransformState &State) { 1418 auto &Builder = State.Builder; 1419 State.setDebugLocFrom(getDebugLoc()); 1420 for (unsigned Part = 0; Part < State.UF; ++Part) { 1421 // Calculate the pointer for the specific unroll-part. 1422 Value *PartPtr = nullptr; 1423 // Use i32 for the gep index type when the value is constant, 1424 // or query DataLayout for a more suitable index type otherwise. 1425 const DataLayout &DL = 1426 Builder.GetInsertBlock()->getModule()->getDataLayout(); 1427 Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0) 1428 ? DL.getIndexType(IndexedTy->getPointerTo()) 1429 : Builder.getInt32Ty(); 1430 Value *Ptr = State.get(getOperand(0), VPIteration(0, 0)); 1431 bool InBounds = isInBounds(); 1432 if (IsReverse) { 1433 // If the address is consecutive but reversed, then the 1434 // wide store needs to start at the last vector element. 1435 // RunTimeVF = VScale * VF.getKnownMinValue() 1436 // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() 1437 Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF); 1438 // NumElt = -Part * RunTimeVF 1439 Value *NumElt = Builder.CreateMul( 1440 ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF); 1441 // LastLane = 1 - RunTimeVF 1442 Value *LastLane = 1443 Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); 1444 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds); 1445 PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds); 1446 } else { 1447 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part); 1448 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds); 1449 } 1450 1451 State.set(this, PartPtr, Part, /*IsScalar*/ true); 1452 } 1453 } 1454 1455 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1456 void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent, 1457 VPSlotTracker &SlotTracker) const { 1458 O << Indent; 1459 printAsOperand(O, SlotTracker); 1460 O << " = vector-pointer "; 1461 if (IsReverse) 1462 O << "(reverse) "; 1463 1464 printOperands(O, SlotTracker); 1465 } 1466 #endif 1467 1468 void VPBlendRecipe::execute(VPTransformState &State) { 1469 State.setDebugLocFrom(getDebugLoc()); 1470 // We know that all PHIs in non-header blocks are converted into 1471 // selects, so we don't have to worry about the insertion order and we 1472 // can just use the builder. 1473 // At this point we generate the predication tree. There may be 1474 // duplications since this is a simple recursive scan, but future 1475 // optimizations will clean it up. 1476 1477 unsigned NumIncoming = getNumIncomingValues(); 1478 1479 // Generate a sequence of selects of the form: 1480 // SELECT(Mask3, In3, 1481 // SELECT(Mask2, In2, 1482 // SELECT(Mask1, In1, 1483 // In0))) 1484 // Note that Mask0 is never used: lanes for which no path reaches this phi and 1485 // are essentially undef are taken from In0. 1486 VectorParts Entry(State.UF); 1487 for (unsigned In = 0; In < NumIncoming; ++In) { 1488 for (unsigned Part = 0; Part < State.UF; ++Part) { 1489 // We might have single edge PHIs (blocks) - use an identity 1490 // 'select' for the first PHI operand. 1491 Value *In0 = State.get(getIncomingValue(In), Part); 1492 if (In == 0) 1493 Entry[Part] = In0; // Initialize with the first incoming value. 1494 else { 1495 // Select between the current value and the previous incoming edge 1496 // based on the incoming mask. 1497 Value *Cond = State.get(getMask(In), Part); 1498 Entry[Part] = 1499 State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi"); 1500 } 1501 } 1502 } 1503 for (unsigned Part = 0; Part < State.UF; ++Part) 1504 State.set(this, Entry[Part], Part); 1505 } 1506 1507 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1508 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, 1509 VPSlotTracker &SlotTracker) const { 1510 O << Indent << "BLEND "; 1511 printAsOperand(O, SlotTracker); 1512 O << " ="; 1513 if (getNumIncomingValues() == 1) { 1514 // Not a User of any mask: not really blending, this is a 1515 // single-predecessor phi. 1516 O << " "; 1517 getIncomingValue(0)->printAsOperand(O, SlotTracker); 1518 } else { 1519 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { 1520 O << " "; 1521 getIncomingValue(I)->printAsOperand(O, SlotTracker); 1522 if (I == 0) 1523 continue; 1524 O << "/"; 1525 getMask(I)->printAsOperand(O, SlotTracker); 1526 } 1527 } 1528 } 1529 #endif 1530 1531 void VPReductionRecipe::execute(VPTransformState &State) { 1532 assert(!State.Instance && "Reduction being replicated."); 1533 Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true); 1534 RecurKind Kind = RdxDesc.getRecurrenceKind(); 1535 // Propagate the fast-math flags carried by the underlying instruction. 1536 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 1537 State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); 1538 for (unsigned Part = 0; Part < State.UF; ++Part) { 1539 Value *NewVecOp = State.get(getVecOp(), Part); 1540 if (VPValue *Cond = getCondOp()) { 1541 Value *NewCond = State.get(Cond, Part, State.VF.isScalar()); 1542 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType()); 1543 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType(); 1544 Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy, 1545 RdxDesc.getFastMathFlags()); 1546 if (State.VF.isVector()) { 1547 Iden = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden); 1548 } 1549 1550 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden); 1551 NewVecOp = Select; 1552 } 1553 Value *NewRed; 1554 Value *NextInChain; 1555 if (IsOrdered) { 1556 if (State.VF.isVector()) 1557 NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp, 1558 PrevInChain); 1559 else 1560 NewRed = State.Builder.CreateBinOp( 1561 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain, 1562 NewVecOp); 1563 PrevInChain = NewRed; 1564 } else { 1565 PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true); 1566 NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp); 1567 } 1568 if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) { 1569 NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(), 1570 NewRed, PrevInChain); 1571 } else if (IsOrdered) 1572 NextInChain = NewRed; 1573 else 1574 NextInChain = State.Builder.CreateBinOp( 1575 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain); 1576 State.set(this, NextInChain, Part, /*IsScalar*/ true); 1577 } 1578 } 1579 1580 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1581 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, 1582 VPSlotTracker &SlotTracker) const { 1583 O << Indent << "REDUCE "; 1584 printAsOperand(O, SlotTracker); 1585 O << " = "; 1586 getChainOp()->printAsOperand(O, SlotTracker); 1587 O << " +"; 1588 if (isa<FPMathOperator>(getUnderlyingInstr())) 1589 O << getUnderlyingInstr()->getFastMathFlags(); 1590 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " ("; 1591 getVecOp()->printAsOperand(O, SlotTracker); 1592 if (getCondOp()) { 1593 O << ", "; 1594 getCondOp()->printAsOperand(O, SlotTracker); 1595 } 1596 O << ")"; 1597 if (RdxDesc.IntermediateStore) 1598 O << " (with final reduction value stored in invariant address sank " 1599 "outside of loop)"; 1600 } 1601 #endif 1602 1603 bool VPReplicateRecipe::shouldPack() const { 1604 // Find if the recipe is used by a widened recipe via an intervening 1605 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector. 1606 return any_of(users(), [](const VPUser *U) { 1607 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U)) 1608 return any_of(PredR->users(), [PredR](const VPUser *U) { 1609 return !U->usesScalars(PredR); 1610 }); 1611 return false; 1612 }); 1613 } 1614 1615 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1616 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, 1617 VPSlotTracker &SlotTracker) const { 1618 O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); 1619 1620 if (!getUnderlyingInstr()->getType()->isVoidTy()) { 1621 printAsOperand(O, SlotTracker); 1622 O << " = "; 1623 } 1624 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) { 1625 O << "call"; 1626 printFlags(O); 1627 O << "@" << CB->getCalledFunction()->getName() << "("; 1628 interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)), 1629 O, [&O, &SlotTracker](VPValue *Op) { 1630 Op->printAsOperand(O, SlotTracker); 1631 }); 1632 O << ")"; 1633 } else { 1634 O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()); 1635 printFlags(O); 1636 printOperands(O, SlotTracker); 1637 } 1638 1639 if (shouldPack()) 1640 O << " (S->V)"; 1641 } 1642 #endif 1643 1644 /// Checks if \p C is uniform across all VFs and UFs. It is considered as such 1645 /// if it is either defined outside the vector region or its operand is known to 1646 /// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI). 1647 /// TODO: Uniformity should be associated with a VPValue and there should be a 1648 /// generic way to check. 1649 static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C) { 1650 return C->isDefinedOutsideVectorRegions() || 1651 isa<VPDerivedIVRecipe>(C->getOperand(0)) || 1652 isa<VPCanonicalIVPHIRecipe>(C->getOperand(0)); 1653 } 1654 1655 Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) { 1656 assert(vputils::onlyFirstLaneUsed(this) && 1657 "Codegen only implemented for first lane."); 1658 switch (Opcode) { 1659 case Instruction::SExt: 1660 case Instruction::ZExt: 1661 case Instruction::Trunc: { 1662 // Note: SExt/ZExt not used yet. 1663 Value *Op = State.get(getOperand(0), VPIteration(Part, 0)); 1664 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy); 1665 } 1666 default: 1667 llvm_unreachable("opcode not implemented yet"); 1668 } 1669 } 1670 1671 void VPScalarCastRecipe ::execute(VPTransformState &State) { 1672 bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this); 1673 for (unsigned Part = 0; Part != State.UF; ++Part) { 1674 Value *Res; 1675 // Only generate a single instance, if the recipe is uniform across UFs and 1676 // VFs. 1677 if (Part > 0 && IsUniformAcrossVFsAndUFs) 1678 Res = State.get(this, VPIteration(0, 0)); 1679 else 1680 Res = generate(State, Part); 1681 State.set(this, Res, VPIteration(Part, 0)); 1682 } 1683 } 1684 1685 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1686 void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent, 1687 VPSlotTracker &SlotTracker) const { 1688 O << Indent << "SCALAR-CAST "; 1689 printAsOperand(O, SlotTracker); 1690 O << " = " << Instruction::getOpcodeName(Opcode) << " "; 1691 printOperands(O, SlotTracker); 1692 O << " to " << *ResultTy; 1693 } 1694 #endif 1695 1696 void VPBranchOnMaskRecipe::execute(VPTransformState &State) { 1697 assert(State.Instance && "Branch on Mask works only on single instance."); 1698 1699 unsigned Part = State.Instance->Part; 1700 unsigned Lane = State.Instance->Lane.getKnownLane(); 1701 1702 Value *ConditionBit = nullptr; 1703 VPValue *BlockInMask = getMask(); 1704 if (BlockInMask) { 1705 ConditionBit = State.get(BlockInMask, Part); 1706 if (ConditionBit->getType()->isVectorTy()) 1707 ConditionBit = State.Builder.CreateExtractElement( 1708 ConditionBit, State.Builder.getInt32(Lane)); 1709 } else // Block in mask is all-one. 1710 ConditionBit = State.Builder.getTrue(); 1711 1712 // Replace the temporary unreachable terminator with a new conditional branch, 1713 // whose two destinations will be set later when they are created. 1714 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator(); 1715 assert(isa<UnreachableInst>(CurrentTerminator) && 1716 "Expected to replace unreachable terminator with conditional branch."); 1717 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit); 1718 CondBr->setSuccessor(0, nullptr); 1719 ReplaceInstWithInst(CurrentTerminator, CondBr); 1720 } 1721 1722 void VPPredInstPHIRecipe::execute(VPTransformState &State) { 1723 assert(State.Instance && "Predicated instruction PHI works per instance."); 1724 Instruction *ScalarPredInst = 1725 cast<Instruction>(State.get(getOperand(0), *State.Instance)); 1726 BasicBlock *PredicatedBB = ScalarPredInst->getParent(); 1727 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); 1728 assert(PredicatingBB && "Predicated block has no single predecessor."); 1729 assert(isa<VPReplicateRecipe>(getOperand(0)) && 1730 "operand must be VPReplicateRecipe"); 1731 1732 // By current pack/unpack logic we need to generate only a single phi node: if 1733 // a vector value for the predicated instruction exists at this point it means 1734 // the instruction has vector users only, and a phi for the vector value is 1735 // needed. In this case the recipe of the predicated instruction is marked to 1736 // also do that packing, thereby "hoisting" the insert-element sequence. 1737 // Otherwise, a phi node for the scalar value is needed. 1738 unsigned Part = State.Instance->Part; 1739 if (State.hasVectorValue(getOperand(0), Part)) { 1740 Value *VectorValue = State.get(getOperand(0), Part); 1741 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue); 1742 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); 1743 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. 1744 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. 1745 if (State.hasVectorValue(this, Part)) 1746 State.reset(this, VPhi, Part); 1747 else 1748 State.set(this, VPhi, Part); 1749 // NOTE: Currently we need to update the value of the operand, so the next 1750 // predicated iteration inserts its generated value in the correct vector. 1751 State.reset(getOperand(0), VPhi, Part); 1752 } else { 1753 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType(); 1754 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); 1755 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), 1756 PredicatingBB); 1757 Phi->addIncoming(ScalarPredInst, PredicatedBB); 1758 if (State.hasScalarValue(this, *State.Instance)) 1759 State.reset(this, Phi, *State.Instance); 1760 else 1761 State.set(this, Phi, *State.Instance); 1762 // NOTE: Currently we need to update the value of the operand, so the next 1763 // predicated iteration inserts its generated value in the correct vector. 1764 State.reset(getOperand(0), Phi, *State.Instance); 1765 } 1766 } 1767 1768 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1769 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1770 VPSlotTracker &SlotTracker) const { 1771 O << Indent << "PHI-PREDICATED-INSTRUCTION "; 1772 printAsOperand(O, SlotTracker); 1773 O << " = "; 1774 printOperands(O, SlotTracker); 1775 } 1776 1777 void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent, 1778 VPSlotTracker &SlotTracker) const { 1779 O << Indent << "WIDEN "; 1780 printAsOperand(O, SlotTracker); 1781 O << " = load "; 1782 printOperands(O, SlotTracker); 1783 } 1784 1785 void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent, 1786 VPSlotTracker &SlotTracker) const { 1787 O << Indent << "WIDEN "; 1788 printAsOperand(O, SlotTracker); 1789 O << " = vp.load "; 1790 printOperands(O, SlotTracker); 1791 } 1792 1793 void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent, 1794 VPSlotTracker &SlotTracker) const { 1795 O << Indent << "WIDEN store "; 1796 printOperands(O, SlotTracker); 1797 } 1798 1799 void VPWidenStoreEVLRecipe::print(raw_ostream &O, const Twine &Indent, 1800 VPSlotTracker &SlotTracker) const { 1801 O << Indent << "WIDEN vp.store "; 1802 printOperands(O, SlotTracker); 1803 } 1804 #endif 1805 1806 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { 1807 Value *Start = getStartValue()->getLiveInIRValue(); 1808 PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index"); 1809 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); 1810 1811 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1812 EntryPart->addIncoming(Start, VectorPH); 1813 EntryPart->setDebugLoc(getDebugLoc()); 1814 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 1815 State.set(this, EntryPart, Part, /*IsScalar*/ true); 1816 } 1817 1818 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1819 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1820 VPSlotTracker &SlotTracker) const { 1821 O << Indent << "EMIT "; 1822 printAsOperand(O, SlotTracker); 1823 O << " = CANONICAL-INDUCTION "; 1824 printOperands(O, SlotTracker); 1825 } 1826 #endif 1827 1828 bool VPCanonicalIVPHIRecipe::isCanonical( 1829 InductionDescriptor::InductionKind Kind, VPValue *Start, 1830 VPValue *Step) const { 1831 // Must be an integer induction. 1832 if (Kind != InductionDescriptor::IK_IntInduction) 1833 return false; 1834 // Start must match the start value of this canonical induction. 1835 if (Start != getStartValue()) 1836 return false; 1837 1838 // If the step is defined by a recipe, it is not a ConstantInt. 1839 if (Step->getDefiningRecipe()) 1840 return false; 1841 1842 ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue()); 1843 return StepC && StepC->isOne(); 1844 } 1845 1846 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) { 1847 return IsScalarAfterVectorization && 1848 (!IsScalable || vputils::onlyFirstLaneUsed(this)); 1849 } 1850 1851 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1852 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, 1853 VPSlotTracker &SlotTracker) const { 1854 O << Indent << "EMIT "; 1855 printAsOperand(O, SlotTracker); 1856 O << " = WIDEN-POINTER-INDUCTION "; 1857 getStartValue()->printAsOperand(O, SlotTracker); 1858 O << ", " << *IndDesc.getStep(); 1859 } 1860 #endif 1861 1862 void VPExpandSCEVRecipe::execute(VPTransformState &State) { 1863 assert(!State.Instance && "cannot be used in per-lane"); 1864 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout(); 1865 SCEVExpander Exp(SE, DL, "induction"); 1866 1867 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), 1868 &*State.Builder.GetInsertPoint()); 1869 assert(!State.ExpandedSCEVs.contains(Expr) && 1870 "Same SCEV expanded multiple times"); 1871 State.ExpandedSCEVs[Expr] = Res; 1872 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 1873 State.set(this, Res, {Part, 0}); 1874 } 1875 1876 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1877 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent, 1878 VPSlotTracker &SlotTracker) const { 1879 O << Indent << "EMIT "; 1880 getVPSingleValue()->printAsOperand(O, SlotTracker); 1881 O << " = EXPAND SCEV " << *Expr; 1882 } 1883 #endif 1884 1885 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { 1886 Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true); 1887 Type *STy = CanonicalIV->getType(); 1888 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 1889 ElementCount VF = State.VF; 1890 Value *VStart = VF.isScalar() 1891 ? CanonicalIV 1892 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast"); 1893 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 1894 Value *VStep = createStepForVF(Builder, STy, VF, Part); 1895 if (VF.isVector()) { 1896 VStep = Builder.CreateVectorSplat(VF, VStep); 1897 VStep = 1898 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType())); 1899 } 1900 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); 1901 State.set(this, CanonicalVectorIV, Part); 1902 } 1903 } 1904 1905 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1906 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, 1907 VPSlotTracker &SlotTracker) const { 1908 O << Indent << "EMIT "; 1909 printAsOperand(O, SlotTracker); 1910 O << " = WIDEN-CANONICAL-INDUCTION "; 1911 printOperands(O, SlotTracker); 1912 } 1913 #endif 1914 1915 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { 1916 auto &Builder = State.Builder; 1917 // Create a vector from the initial value. 1918 auto *VectorInit = getStartValue()->getLiveInIRValue(); 1919 1920 Type *VecTy = State.VF.isScalar() 1921 ? VectorInit->getType() 1922 : VectorType::get(VectorInit->getType(), State.VF); 1923 1924 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1925 if (State.VF.isVector()) { 1926 auto *IdxTy = Builder.getInt32Ty(); 1927 auto *One = ConstantInt::get(IdxTy, 1); 1928 IRBuilder<>::InsertPointGuard Guard(Builder); 1929 Builder.SetInsertPoint(VectorPH->getTerminator()); 1930 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); 1931 auto *LastIdx = Builder.CreateSub(RuntimeVF, One); 1932 VectorInit = Builder.CreateInsertElement( 1933 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); 1934 } 1935 1936 // Create a phi node for the new recurrence. 1937 PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur"); 1938 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); 1939 EntryPart->addIncoming(VectorInit, VectorPH); 1940 State.set(this, EntryPart, 0); 1941 } 1942 1943 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1944 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, 1945 VPSlotTracker &SlotTracker) const { 1946 O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; 1947 printAsOperand(O, SlotTracker); 1948 O << " = phi "; 1949 printOperands(O, SlotTracker); 1950 } 1951 #endif 1952 1953 void VPReductionPHIRecipe::execute(VPTransformState &State) { 1954 auto &Builder = State.Builder; 1955 1956 // Reductions do not have to start at zero. They can start with 1957 // any loop invariant values. 1958 VPValue *StartVPV = getStartValue(); 1959 Value *StartV = StartVPV->getLiveInIRValue(); 1960 1961 // In order to support recurrences we need to be able to vectorize Phi nodes. 1962 // Phi nodes have cycles, so we need to vectorize them in two stages. This is 1963 // stage #1: We create a new vector PHI node with no incoming edges. We'll use 1964 // this value when we vectorize all of the instructions that use the PHI. 1965 bool ScalarPHI = State.VF.isScalar() || IsInLoop; 1966 Type *VecTy = ScalarPHI ? StartV->getType() 1967 : VectorType::get(StartV->getType(), State.VF); 1968 1969 BasicBlock *HeaderBB = State.CFG.PrevBB; 1970 assert(State.CurrentVectorLoop->getHeader() == HeaderBB && 1971 "recipe must be in the vector loop header"); 1972 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF; 1973 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 1974 Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi"); 1975 EntryPart->insertBefore(HeaderBB->getFirstInsertionPt()); 1976 State.set(this, EntryPart, Part, IsInLoop); 1977 } 1978 1979 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1980 1981 Value *Iden = nullptr; 1982 RecurKind RK = RdxDesc.getRecurrenceKind(); 1983 if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || 1984 RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) { 1985 // MinMax and AnyOf reductions have the start value as their identity. 1986 if (ScalarPHI) { 1987 Iden = StartV; 1988 } else { 1989 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 1990 Builder.SetInsertPoint(VectorPH->getTerminator()); 1991 StartV = Iden = 1992 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); 1993 } 1994 } else { 1995 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(), 1996 RdxDesc.getFastMathFlags()); 1997 1998 if (!ScalarPHI) { 1999 Iden = Builder.CreateVectorSplat(State.VF, Iden); 2000 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 2001 Builder.SetInsertPoint(VectorPH->getTerminator()); 2002 Constant *Zero = Builder.getInt32(0); 2003 StartV = Builder.CreateInsertElement(Iden, StartV, Zero); 2004 } 2005 } 2006 2007 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 2008 Value *EntryPart = State.get(this, Part, IsInLoop); 2009 // Make sure to add the reduction start value only to the 2010 // first unroll part. 2011 Value *StartVal = (Part == 0) ? StartV : Iden; 2012 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH); 2013 } 2014 } 2015 2016 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2017 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, 2018 VPSlotTracker &SlotTracker) const { 2019 O << Indent << "WIDEN-REDUCTION-PHI "; 2020 2021 printAsOperand(O, SlotTracker); 2022 O << " = phi "; 2023 printOperands(O, SlotTracker); 2024 } 2025 #endif 2026 2027 void VPWidenPHIRecipe::execute(VPTransformState &State) { 2028 assert(EnableVPlanNativePath && 2029 "Non-native vplans are not expected to have VPWidenPHIRecipes."); 2030 2031 Value *Op0 = State.get(getOperand(0), 0); 2032 Type *VecTy = Op0->getType(); 2033 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); 2034 State.set(this, VecPhi, 0); 2035 } 2036 2037 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2038 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, 2039 VPSlotTracker &SlotTracker) const { 2040 O << Indent << "WIDEN-PHI "; 2041 2042 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue()); 2043 // Unless all incoming values are modeled in VPlan print the original PHI 2044 // directly. 2045 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming 2046 // values as VPValues. 2047 if (getNumOperands() != OriginalPhi->getNumOperands()) { 2048 O << VPlanIngredient(OriginalPhi); 2049 return; 2050 } 2051 2052 printAsOperand(O, SlotTracker); 2053 O << " = phi "; 2054 printOperands(O, SlotTracker); 2055 } 2056 #endif 2057 2058 // TODO: It would be good to use the existing VPWidenPHIRecipe instead and 2059 // remove VPActiveLaneMaskPHIRecipe. 2060 void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) { 2061 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 2062 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 2063 Value *StartMask = State.get(getOperand(0), Part); 2064 PHINode *EntryPart = 2065 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask"); 2066 EntryPart->addIncoming(StartMask, VectorPH); 2067 EntryPart->setDebugLoc(getDebugLoc()); 2068 State.set(this, EntryPart, Part); 2069 } 2070 } 2071 2072 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2073 void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent, 2074 VPSlotTracker &SlotTracker) const { 2075 O << Indent << "ACTIVE-LANE-MASK-PHI "; 2076 2077 printAsOperand(O, SlotTracker); 2078 O << " = phi "; 2079 printOperands(O, SlotTracker); 2080 } 2081 #endif 2082 2083 void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) { 2084 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 2085 assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization."); 2086 Value *Start = State.get(getOperand(0), VPIteration(0, 0)); 2087 PHINode *EntryPart = 2088 State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv"); 2089 EntryPart->addIncoming(Start, VectorPH); 2090 EntryPart->setDebugLoc(getDebugLoc()); 2091 State.set(this, EntryPart, 0, /*IsScalar=*/true); 2092 } 2093 2094 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2095 void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 2096 VPSlotTracker &SlotTracker) const { 2097 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI "; 2098 2099 printAsOperand(O, SlotTracker); 2100 O << " = phi "; 2101 printOperands(O, SlotTracker); 2102 } 2103 #endif 2104