1 //===-- VPlanTransforms.cpp - Utility VPlan to VPlan transforms -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements a set of utility VPlan to VPlan transformations. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "VPlanTransforms.h" 15 #include "VPlanCFG.h" 16 #include "llvm/ADT/PostOrderIterator.h" 17 #include "llvm/ADT/SetVector.h" 18 #include "llvm/Analysis/IVDescriptors.h" 19 #include "llvm/Analysis/VectorUtils.h" 20 #include "llvm/IR/Intrinsics.h" 21 22 using namespace llvm; 23 24 void VPlanTransforms::VPInstructionsToVPRecipes( 25 Loop *OrigLoop, VPlanPtr &Plan, 26 function_ref<const InductionDescriptor *(PHINode *)> 27 GetIntOrFpInductionDescriptor, 28 SmallPtrSetImpl<Instruction *> &DeadInstructions, ScalarEvolution &SE, 29 const TargetLibraryInfo &TLI) { 30 31 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( 32 Plan->getEntry()); 33 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) { 34 VPRecipeBase *Term = VPBB->getTerminator(); 35 auto EndIter = Term ? Term->getIterator() : VPBB->end(); 36 // Introduce each ingredient into VPlan. 37 for (VPRecipeBase &Ingredient : 38 make_early_inc_range(make_range(VPBB->begin(), EndIter))) { 39 40 VPValue *VPV = Ingredient.getVPSingleValue(); 41 Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue()); 42 if (DeadInstructions.count(Inst)) { 43 VPValue DummyValue; 44 VPV->replaceAllUsesWith(&DummyValue); 45 Ingredient.eraseFromParent(); 46 continue; 47 } 48 49 VPRecipeBase *NewRecipe = nullptr; 50 if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) { 51 auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue()); 52 if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) { 53 VPValue *Start = Plan->getOrAddVPValue(II->getStartValue()); 54 VPValue *Step = 55 vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE); 56 NewRecipe = 57 new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II, true); 58 } else { 59 Plan->addVPValue(Phi, VPPhi); 60 continue; 61 } 62 } else { 63 assert(isa<VPInstruction>(&Ingredient) && 64 "only VPInstructions expected here"); 65 assert(!isa<PHINode>(Inst) && "phis should be handled above"); 66 // Create VPWidenMemoryInstructionRecipe for loads and stores. 67 if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 68 NewRecipe = new VPWidenMemoryInstructionRecipe( 69 *Load, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)), 70 nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/); 71 } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 72 NewRecipe = new VPWidenMemoryInstructionRecipe( 73 *Store, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)), 74 Plan->getOrAddVPValue(Store->getValueOperand()), nullptr /*Mask*/, 75 false /*Consecutive*/, false /*Reverse*/); 76 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { 77 NewRecipe = new VPWidenGEPRecipe( 78 GEP, Plan->mapToVPValues(GEP->operands()), OrigLoop); 79 } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) { 80 NewRecipe = 81 new VPWidenCallRecipe(*CI, Plan->mapToVPValues(CI->args()), 82 getVectorIntrinsicIDForCall(CI, &TLI)); 83 } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) { 84 bool InvariantCond = 85 SE.isLoopInvariant(SE.getSCEV(SI->getOperand(0)), OrigLoop); 86 NewRecipe = new VPWidenSelectRecipe( 87 *SI, Plan->mapToVPValues(SI->operands()), InvariantCond); 88 } else { 89 NewRecipe = 90 new VPWidenRecipe(*Inst, Plan->mapToVPValues(Inst->operands())); 91 } 92 } 93 94 NewRecipe->insertBefore(&Ingredient); 95 if (NewRecipe->getNumDefinedValues() == 1) 96 VPV->replaceAllUsesWith(NewRecipe->getVPSingleValue()); 97 else 98 assert(NewRecipe->getNumDefinedValues() == 0 && 99 "Only recpies with zero or one defined values expected"); 100 Ingredient.eraseFromParent(); 101 Plan->removeVPValueFor(Inst); 102 for (auto *Def : NewRecipe->definedValues()) { 103 Plan->addVPValue(Inst, Def); 104 } 105 } 106 } 107 } 108 109 bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) { 110 auto Iter = vp_depth_first_deep(Plan.getEntry()); 111 bool Changed = false; 112 // First, collect the operands of all recipes in replicate blocks as seeds for 113 // sinking. 114 SetVector<std::pair<VPBasicBlock *, VPRecipeBase *>> WorkList; 115 for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) { 116 VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock(); 117 if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2) 118 continue; 119 VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]); 120 if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock()) 121 continue; 122 for (auto &Recipe : *VPBB) { 123 for (VPValue *Op : Recipe.operands()) 124 if (auto *Def = Op->getDefiningRecipe()) 125 WorkList.insert(std::make_pair(VPBB, Def)); 126 } 127 } 128 129 bool ScalarVFOnly = Plan.hasScalarVFOnly(); 130 // Try to sink each replicate or scalar IV steps recipe in the worklist. 131 for (unsigned I = 0; I != WorkList.size(); ++I) { 132 VPBasicBlock *SinkTo; 133 VPRecipeBase *SinkCandidate; 134 std::tie(SinkTo, SinkCandidate) = WorkList[I]; 135 if (SinkCandidate->getParent() == SinkTo || 136 SinkCandidate->mayHaveSideEffects() || 137 SinkCandidate->mayReadOrWriteMemory()) 138 continue; 139 if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) { 140 if (!ScalarVFOnly && RepR->isUniform()) 141 continue; 142 } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate)) 143 continue; 144 145 bool NeedsDuplicating = false; 146 // All recipe users of the sink candidate must be in the same block SinkTo 147 // or all users outside of SinkTo must be uniform-after-vectorization ( 148 // i.e., only first lane is used) . In the latter case, we need to duplicate 149 // SinkCandidate. 150 auto CanSinkWithUser = [SinkTo, &NeedsDuplicating, 151 SinkCandidate](VPUser *U) { 152 auto *UI = dyn_cast<VPRecipeBase>(U); 153 if (!UI) 154 return false; 155 if (UI->getParent() == SinkTo) 156 return true; 157 NeedsDuplicating = 158 UI->onlyFirstLaneUsed(SinkCandidate->getVPSingleValue()); 159 // We only know how to duplicate VPRecipeRecipes for now. 160 return NeedsDuplicating && isa<VPReplicateRecipe>(SinkCandidate); 161 }; 162 if (!all_of(SinkCandidate->getVPSingleValue()->users(), CanSinkWithUser)) 163 continue; 164 165 if (NeedsDuplicating) { 166 if (ScalarVFOnly) 167 continue; 168 Instruction *I = cast<Instruction>( 169 cast<VPReplicateRecipe>(SinkCandidate)->getUnderlyingValue()); 170 auto *Clone = 171 new VPReplicateRecipe(I, SinkCandidate->operands(), true, false); 172 // TODO: add ".cloned" suffix to name of Clone's VPValue. 173 174 Clone->insertBefore(SinkCandidate); 175 for (auto *U : to_vector(SinkCandidate->getVPSingleValue()->users())) { 176 auto *UI = cast<VPRecipeBase>(U); 177 if (UI->getParent() == SinkTo) 178 continue; 179 180 for (unsigned Idx = 0; Idx != UI->getNumOperands(); Idx++) { 181 if (UI->getOperand(Idx) != SinkCandidate->getVPSingleValue()) 182 continue; 183 UI->setOperand(Idx, Clone); 184 } 185 } 186 } 187 SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi()); 188 for (VPValue *Op : SinkCandidate->operands()) 189 if (auto *Def = Op->getDefiningRecipe()) 190 WorkList.insert(std::make_pair(SinkTo, Def)); 191 Changed = true; 192 } 193 return Changed; 194 } 195 196 /// If \p R is a region with a VPBranchOnMaskRecipe in the entry block, return 197 /// the mask. 198 VPValue *getPredicatedMask(VPRegionBlock *R) { 199 auto *EntryBB = dyn_cast<VPBasicBlock>(R->getEntry()); 200 if (!EntryBB || EntryBB->size() != 1 || 201 !isa<VPBranchOnMaskRecipe>(EntryBB->begin())) 202 return nullptr; 203 204 return cast<VPBranchOnMaskRecipe>(&*EntryBB->begin())->getOperand(0); 205 } 206 207 /// If \p R is a triangle region, return the 'then' block of the triangle. 208 static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) { 209 auto *EntryBB = cast<VPBasicBlock>(R->getEntry()); 210 if (EntryBB->getNumSuccessors() != 2) 211 return nullptr; 212 213 auto *Succ0 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[0]); 214 auto *Succ1 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[1]); 215 if (!Succ0 || !Succ1) 216 return nullptr; 217 218 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1) 219 return nullptr; 220 if (Succ0->getSingleSuccessor() == Succ1) 221 return Succ0; 222 if (Succ1->getSingleSuccessor() == Succ0) 223 return Succ1; 224 return nullptr; 225 } 226 227 bool VPlanTransforms::mergeReplicateRegionsIntoSuccessors(VPlan &Plan) { 228 SetVector<VPRegionBlock *> DeletedRegions; 229 230 // Collect replicate regions followed by an empty block, followed by another 231 // replicate region with matching masks to process front. This is to avoid 232 // iterator invalidation issues while merging regions. 233 SmallVector<VPRegionBlock *, 8> WorkList; 234 for (VPRegionBlock *Region1 : VPBlockUtils::blocksOnly<VPRegionBlock>( 235 vp_depth_first_deep(Plan.getEntry()))) { 236 if (!Region1->isReplicator()) 237 continue; 238 auto *MiddleBasicBlock = 239 dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor()); 240 if (!MiddleBasicBlock || !MiddleBasicBlock->empty()) 241 continue; 242 243 auto *Region2 = 244 dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor()); 245 if (!Region2 || !Region2->isReplicator()) 246 continue; 247 248 VPValue *Mask1 = getPredicatedMask(Region1); 249 VPValue *Mask2 = getPredicatedMask(Region2); 250 if (!Mask1 || Mask1 != Mask2) 251 continue; 252 253 assert(Mask1 && Mask2 && "both region must have conditions"); 254 WorkList.push_back(Region1); 255 } 256 257 // Move recipes from Region1 to its successor region, if both are triangles. 258 for (VPRegionBlock *Region1 : WorkList) { 259 if (DeletedRegions.contains(Region1)) 260 continue; 261 auto *MiddleBasicBlock = cast<VPBasicBlock>(Region1->getSingleSuccessor()); 262 auto *Region2 = cast<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor()); 263 264 VPBasicBlock *Then1 = getPredicatedThenBlock(Region1); 265 VPBasicBlock *Then2 = getPredicatedThenBlock(Region2); 266 if (!Then1 || !Then2) 267 continue; 268 269 // Note: No fusion-preventing memory dependencies are expected in either 270 // region. Such dependencies should be rejected during earlier dependence 271 // checks, which guarantee accesses can be re-ordered for vectorization. 272 // 273 // Move recipes to the successor region. 274 for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1))) 275 ToMove.moveBefore(*Then2, Then2->getFirstNonPhi()); 276 277 auto *Merge1 = cast<VPBasicBlock>(Then1->getSingleSuccessor()); 278 auto *Merge2 = cast<VPBasicBlock>(Then2->getSingleSuccessor()); 279 280 // Move VPPredInstPHIRecipes from the merge block to the successor region's 281 // merge block. Update all users inside the successor region to use the 282 // original values. 283 for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) { 284 VPValue *PredInst1 = 285 cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0); 286 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue(); 287 for (VPUser *U : to_vector(Phi1ToMoveV->users())) { 288 auto *UI = dyn_cast<VPRecipeBase>(U); 289 if (!UI || UI->getParent() != Then2) 290 continue; 291 for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) { 292 if (Phi1ToMoveV != U->getOperand(I)) 293 continue; 294 U->setOperand(I, PredInst1); 295 } 296 } 297 298 Phi1ToMove.moveBefore(*Merge2, Merge2->begin()); 299 } 300 301 // Finally, remove the first region. 302 for (VPBlockBase *Pred : make_early_inc_range(Region1->getPredecessors())) { 303 VPBlockUtils::disconnectBlocks(Pred, Region1); 304 VPBlockUtils::connectBlocks(Pred, MiddleBasicBlock); 305 } 306 VPBlockUtils::disconnectBlocks(Region1, MiddleBasicBlock); 307 DeletedRegions.insert(Region1); 308 } 309 310 for (VPRegionBlock *ToDelete : DeletedRegions) 311 delete ToDelete; 312 return !DeletedRegions.empty(); 313 } 314 315 bool VPlanTransforms::mergeBlocksIntoPredecessors(VPlan &Plan) { 316 SmallVector<VPBasicBlock *> WorkList; 317 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( 318 vp_depth_first_deep(Plan.getEntry()))) { 319 auto *PredVPBB = 320 dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor()); 321 if (PredVPBB && PredVPBB->getNumSuccessors() == 1) 322 WorkList.push_back(VPBB); 323 } 324 325 for (VPBasicBlock *VPBB : WorkList) { 326 VPBasicBlock *PredVPBB = cast<VPBasicBlock>(VPBB->getSinglePredecessor()); 327 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) 328 R.moveBefore(*PredVPBB, PredVPBB->end()); 329 VPBlockUtils::disconnectBlocks(PredVPBB, VPBB); 330 auto *ParentRegion = cast_or_null<VPRegionBlock>(VPBB->getParent()); 331 if (ParentRegion && ParentRegion->getExiting() == VPBB) 332 ParentRegion->setExiting(PredVPBB); 333 for (auto *Succ : to_vector(VPBB->successors())) { 334 VPBlockUtils::disconnectBlocks(VPBB, Succ); 335 VPBlockUtils::connectBlocks(PredVPBB, Succ); 336 } 337 delete VPBB; 338 } 339 return !WorkList.empty(); 340 } 341 342 void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) { 343 for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { 344 auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); 345 if (!IV || IV->getTruncInst()) 346 continue; 347 348 // A sequence of IR Casts has potentially been recorded for IV, which 349 // *must be bypassed* when the IV is vectorized, because the vectorized IV 350 // will produce the desired casted value. This sequence forms a def-use 351 // chain and is provided in reverse order, ending with the cast that uses 352 // the IV phi. Search for the recipe of the last cast in the chain and 353 // replace it with the original IV. Note that only the final cast is 354 // expected to have users outside the cast-chain and the dead casts left 355 // over will be cleaned up later. 356 auto &Casts = IV->getInductionDescriptor().getCastInsts(); 357 VPValue *FindMyCast = IV; 358 for (Instruction *IRCast : reverse(Casts)) { 359 VPRecipeBase *FoundUserCast = nullptr; 360 for (auto *U : FindMyCast->users()) { 361 auto *UserCast = cast<VPRecipeBase>(U); 362 if (UserCast->getNumDefinedValues() == 1 && 363 UserCast->getVPSingleValue()->getUnderlyingValue() == IRCast) { 364 FoundUserCast = UserCast; 365 break; 366 } 367 } 368 FindMyCast = FoundUserCast->getVPSingleValue(); 369 } 370 FindMyCast->replaceAllUsesWith(IV); 371 } 372 } 373 374 void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) { 375 VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); 376 VPWidenCanonicalIVRecipe *WidenNewIV = nullptr; 377 for (VPUser *U : CanonicalIV->users()) { 378 WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U); 379 if (WidenNewIV) 380 break; 381 } 382 383 if (!WidenNewIV) 384 return; 385 386 VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); 387 for (VPRecipeBase &Phi : HeaderVPBB->phis()) { 388 auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); 389 390 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical() || 391 WidenOriginalIV->getScalarType() != WidenNewIV->getScalarType()) 392 continue; 393 394 // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides 395 // everything WidenNewIV's users need. That is, WidenOriginalIV will 396 // generate a vector phi or all users of WidenNewIV demand the first lane 397 // only. 398 if (WidenOriginalIV->needsVectorIV() || 399 vputils::onlyFirstLaneUsed(WidenNewIV)) { 400 WidenNewIV->replaceAllUsesWith(WidenOriginalIV); 401 WidenNewIV->eraseFromParent(); 402 return; 403 } 404 } 405 } 406 407 void VPlanTransforms::removeDeadRecipes(VPlan &Plan) { 408 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( 409 Plan.getEntry()); 410 411 for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) { 412 // The recipes in the block are processed in reverse order, to catch chains 413 // of dead recipes. 414 for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) { 415 if (R.mayHaveSideEffects() || any_of(R.definedValues(), [](VPValue *V) { 416 return V->getNumUsers() > 0; 417 })) 418 continue; 419 R.eraseFromParent(); 420 } 421 } 422 } 423 424 void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { 425 SmallVector<VPRecipeBase *> ToRemove; 426 VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); 427 bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1)); 428 for (VPRecipeBase &Phi : HeaderVPBB->phis()) { 429 auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); 430 if (!WideIV) 431 continue; 432 if (HasOnlyVectorVFs && none_of(WideIV->users(), [WideIV](VPUser *U) { 433 return U->usesScalars(WideIV); 434 })) 435 continue; 436 437 auto IP = HeaderVPBB->getFirstNonPhi(); 438 VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); 439 Type *ResultTy = WideIV->getPHINode()->getType(); 440 if (Instruction *TruncI = WideIV->getTruncInst()) 441 ResultTy = TruncI->getType(); 442 const InductionDescriptor &ID = WideIV->getInductionDescriptor(); 443 VPValue *Step = 444 vputils::getOrCreateVPValueForSCEVExpr(Plan, ID.getStep(), SE); 445 VPValue *BaseIV = CanonicalIV; 446 if (!CanonicalIV->isCanonical(ID, ResultTy)) { 447 BaseIV = new VPDerivedIVRecipe(ID, WideIV->getStartValue(), CanonicalIV, 448 Step, ResultTy); 449 HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP); 450 } 451 452 VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step); 453 HeaderVPBB->insert(Steps, IP); 454 455 // Update scalar users of IV to use Step instead. Use SetVector to ensure 456 // the list of users doesn't contain duplicates. 457 SetVector<VPUser *> Users(WideIV->user_begin(), WideIV->user_end()); 458 for (VPUser *U : Users) { 459 if (HasOnlyVectorVFs && !U->usesScalars(WideIV)) 460 continue; 461 for (unsigned I = 0, E = U->getNumOperands(); I != E; I++) { 462 if (U->getOperand(I) != WideIV) 463 continue; 464 U->setOperand(I, Steps); 465 } 466 } 467 } 468 } 469 470 void VPlanTransforms::removeRedundantExpandSCEVRecipes(VPlan &Plan) { 471 DenseMap<const SCEV *, VPValue *> SCEV2VPV; 472 473 for (VPRecipeBase &R : 474 make_early_inc_range(*Plan.getEntry()->getEntryBasicBlock())) { 475 auto *ExpR = dyn_cast<VPExpandSCEVRecipe>(&R); 476 if (!ExpR) 477 continue; 478 479 auto I = SCEV2VPV.insert({ExpR->getSCEV(), ExpR}); 480 if (I.second) 481 continue; 482 ExpR->replaceAllUsesWith(I.first->second); 483 ExpR->eraseFromParent(); 484 } 485 } 486 487 static bool canSimplifyBranchOnCond(VPInstruction *Term) { 488 VPInstruction *Not = dyn_cast<VPInstruction>(Term->getOperand(0)); 489 if (!Not || Not->getOpcode() != VPInstruction::Not) 490 return false; 491 492 VPInstruction *ALM = dyn_cast<VPInstruction>(Not->getOperand(0)); 493 return ALM && ALM->getOpcode() == VPInstruction::ActiveLaneMask; 494 } 495 496 void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, 497 unsigned BestUF, 498 PredicatedScalarEvolution &PSE) { 499 assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan"); 500 assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan"); 501 VPBasicBlock *ExitingVPBB = 502 Plan.getVectorLoopRegion()->getExitingBasicBlock(); 503 auto *Term = dyn_cast<VPInstruction>(&ExitingVPBB->back()); 504 // Try to simplify the branch condition if TC <= VF * UF when preparing to 505 // execute the plan for the main vector loop. We only do this if the 506 // terminator is: 507 // 1. BranchOnCount, or 508 // 2. BranchOnCond where the input is Not(ActiveLaneMask). 509 if (!Term || (Term->getOpcode() != VPInstruction::BranchOnCount && 510 (Term->getOpcode() != VPInstruction::BranchOnCond || 511 !canSimplifyBranchOnCond(Term)))) 512 return; 513 514 Type *IdxTy = 515 Plan.getCanonicalIV()->getStartValue()->getLiveInIRValue()->getType(); 516 const SCEV *TripCount = createTripCountSCEV(IdxTy, PSE); 517 ScalarEvolution &SE = *PSE.getSE(); 518 const SCEV *C = 519 SE.getConstant(TripCount->getType(), BestVF.getKnownMinValue() * BestUF); 520 if (TripCount->isZero() || 521 !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C)) 522 return; 523 524 LLVMContext &Ctx = SE.getContext(); 525 auto *BOC = 526 new VPInstruction(VPInstruction::BranchOnCond, 527 {Plan.getOrAddExternalDef(ConstantInt::getTrue(Ctx))}); 528 Term->eraseFromParent(); 529 ExitingVPBB->appendRecipe(BOC); 530 Plan.setVF(BestVF); 531 Plan.setUF(BestUF); 532 // TODO: Further simplifications are possible 533 // 1. Replace inductions with constants. 534 // 2. Replace vector loop region with VPBasicBlock. 535 } 536