1480093f4SDimitry Andric //===-- VPlanTransforms.cpp - Utility VPlan to VPlan transforms -----------===// 2480093f4SDimitry Andric // 3480093f4SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4480093f4SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5480093f4SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6480093f4SDimitry Andric // 7480093f4SDimitry Andric //===----------------------------------------------------------------------===// 8480093f4SDimitry Andric /// 9480093f4SDimitry Andric /// \file 10480093f4SDimitry Andric /// This file implements a set of utility VPlan to VPlan transformations. 11480093f4SDimitry Andric /// 12480093f4SDimitry Andric //===----------------------------------------------------------------------===// 13480093f4SDimitry Andric 14480093f4SDimitry Andric #include "VPlanTransforms.h" 1506c3fb27SDimitry Andric #include "VPRecipeBuilder.h" 165f757f3fSDimitry Andric #include "VPlanAnalysis.h" 17bdd1243dSDimitry Andric #include "VPlanCFG.h" 185f757f3fSDimitry Andric #include "VPlanDominatorTree.h" 19*0fca6ea1SDimitry Andric #include "VPlanPatternMatch.h" 20480093f4SDimitry Andric #include "llvm/ADT/PostOrderIterator.h" 215f757f3fSDimitry Andric #include "llvm/ADT/STLExtras.h" 2281ad6265SDimitry Andric #include "llvm/ADT/SetVector.h" 2381ad6265SDimitry Andric #include "llvm/Analysis/IVDescriptors.h" 24bdd1243dSDimitry Andric #include "llvm/Analysis/VectorUtils.h" 25bdd1243dSDimitry Andric #include "llvm/IR/Intrinsics.h" 265f757f3fSDimitry Andric #include "llvm/IR/PatternMatch.h" 27480093f4SDimitry Andric 28480093f4SDimitry Andric using namespace llvm; 29480093f4SDimitry Andric 30480093f4SDimitry Andric void VPlanTransforms::VPInstructionsToVPRecipes( 3106c3fb27SDimitry Andric VPlanPtr &Plan, 320eae32dcSDimitry Andric function_ref<const InductionDescriptor *(PHINode *)> 330eae32dcSDimitry Andric GetIntOrFpInductionDescriptor, 3406c3fb27SDimitry Andric ScalarEvolution &SE, const TargetLibraryInfo &TLI) { 35480093f4SDimitry Andric 36bdd1243dSDimitry Andric ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( 37*0fca6ea1SDimitry Andric Plan->getVectorLoopRegion()); 3881ad6265SDimitry Andric for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) { 39*0fca6ea1SDimitry Andric // Skip blocks outside region 40*0fca6ea1SDimitry Andric if (!VPBB->getParent()) 41*0fca6ea1SDimitry Andric break; 4281ad6265SDimitry Andric VPRecipeBase *Term = VPBB->getTerminator(); 4381ad6265SDimitry Andric auto EndIter = Term ? Term->getIterator() : VPBB->end(); 44480093f4SDimitry Andric // Introduce each ingredient into VPlan. 4581ad6265SDimitry Andric for (VPRecipeBase &Ingredient : 4681ad6265SDimitry Andric make_early_inc_range(make_range(VPBB->begin(), EndIter))) { 4781ad6265SDimitry Andric 48349cc55cSDimitry Andric VPValue *VPV = Ingredient.getVPSingleValue(); 49fe6060f1SDimitry Andric Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue()); 50480093f4SDimitry Andric 51480093f4SDimitry Andric VPRecipeBase *NewRecipe = nullptr; 52349cc55cSDimitry Andric if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) { 53fe6060f1SDimitry Andric auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue()); 54*0fca6ea1SDimitry Andric const auto *II = GetIntOrFpInductionDescriptor(Phi); 55*0fca6ea1SDimitry Andric if (!II) 56*0fca6ea1SDimitry Andric continue; 57*0fca6ea1SDimitry Andric 58*0fca6ea1SDimitry Andric VPValue *Start = Plan->getOrAddLiveIn(II->getStartValue()); 5981ad6265SDimitry Andric VPValue *Step = 6081ad6265SDimitry Andric vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE); 6106c3fb27SDimitry Andric NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II); 62fe6060f1SDimitry Andric } else { 63349cc55cSDimitry Andric assert(isa<VPInstruction>(&Ingredient) && 64fe6060f1SDimitry Andric "only VPInstructions expected here"); 65fe6060f1SDimitry Andric assert(!isa<PHINode>(Inst) && "phis should be handled above"); 66*0fca6ea1SDimitry Andric // Create VPWidenMemoryRecipe for loads and stores. 67fe6060f1SDimitry Andric if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 68*0fca6ea1SDimitry Andric NewRecipe = new VPWidenLoadRecipe( 6906c3fb27SDimitry Andric *Load, Ingredient.getOperand(0), nullptr /*Mask*/, 70*0fca6ea1SDimitry Andric false /*Consecutive*/, false /*Reverse*/, 71*0fca6ea1SDimitry Andric Ingredient.getDebugLoc()); 72fe6060f1SDimitry Andric } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 73*0fca6ea1SDimitry Andric NewRecipe = new VPWidenStoreRecipe( 7406c3fb27SDimitry Andric *Store, Ingredient.getOperand(1), Ingredient.getOperand(0), 75*0fca6ea1SDimitry Andric nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/, 76*0fca6ea1SDimitry Andric Ingredient.getDebugLoc()); 77480093f4SDimitry Andric } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { 7806c3fb27SDimitry Andric NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands()); 79fe6060f1SDimitry Andric } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) { 807a6dacacSDimitry Andric NewRecipe = new VPWidenCallRecipe( 81*0fca6ea1SDimitry Andric CI, Ingredient.operands(), getVectorIntrinsicIDForCall(CI, &TLI), 82*0fca6ea1SDimitry Andric CI->getDebugLoc()); 83fe6060f1SDimitry Andric } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) { 8406c3fb27SDimitry Andric NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands()); 8506c3fb27SDimitry Andric } else if (auto *CI = dyn_cast<CastInst>(Inst)) { 8606c3fb27SDimitry Andric NewRecipe = new VPWidenCastRecipe( 875f757f3fSDimitry Andric CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), *CI); 88fe6060f1SDimitry Andric } else { 8906c3fb27SDimitry Andric NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands()); 90fe6060f1SDimitry Andric } 91fe6060f1SDimitry Andric } 92480093f4SDimitry Andric 93349cc55cSDimitry Andric NewRecipe->insertBefore(&Ingredient); 94e8d8bef9SDimitry Andric if (NewRecipe->getNumDefinedValues() == 1) 95fe6060f1SDimitry Andric VPV->replaceAllUsesWith(NewRecipe->getVPSingleValue()); 96e8d8bef9SDimitry Andric else 97e8d8bef9SDimitry Andric assert(NewRecipe->getNumDefinedValues() == 0 && 98e8d8bef9SDimitry Andric "Only recpies with zero or one defined values expected"); 99349cc55cSDimitry Andric Ingredient.eraseFromParent(); 100480093f4SDimitry Andric } 101480093f4SDimitry Andric } 102fe6060f1SDimitry Andric } 103fe6060f1SDimitry Andric 10406c3fb27SDimitry Andric static bool sinkScalarOperands(VPlan &Plan) { 105bdd1243dSDimitry Andric auto Iter = vp_depth_first_deep(Plan.getEntry()); 106fe6060f1SDimitry Andric bool Changed = false; 107bdd1243dSDimitry Andric // First, collect the operands of all recipes in replicate blocks as seeds for 108bdd1243dSDimitry Andric // sinking. 1097a6dacacSDimitry Andric SetVector<std::pair<VPBasicBlock *, VPSingleDefRecipe *>> WorkList; 110bdd1243dSDimitry Andric for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) { 111bdd1243dSDimitry Andric VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock(); 112bdd1243dSDimitry Andric if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2) 113fe6060f1SDimitry Andric continue; 114bdd1243dSDimitry Andric VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]); 115bdd1243dSDimitry Andric if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock()) 116bdd1243dSDimitry Andric continue; 117bdd1243dSDimitry Andric for (auto &Recipe : *VPBB) { 118bdd1243dSDimitry Andric for (VPValue *Op : Recipe.operands()) 1197a6dacacSDimitry Andric if (auto *Def = 1207a6dacacSDimitry Andric dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe())) 121bdd1243dSDimitry Andric WorkList.insert(std::make_pair(VPBB, Def)); 122fe6060f1SDimitry Andric } 123fe6060f1SDimitry Andric } 124fe6060f1SDimitry Andric 125bdd1243dSDimitry Andric bool ScalarVFOnly = Plan.hasScalarVFOnly(); 126bdd1243dSDimitry Andric // Try to sink each replicate or scalar IV steps recipe in the worklist. 127bdd1243dSDimitry Andric for (unsigned I = 0; I != WorkList.size(); ++I) { 128349cc55cSDimitry Andric VPBasicBlock *SinkTo; 1297a6dacacSDimitry Andric VPSingleDefRecipe *SinkCandidate; 130bdd1243dSDimitry Andric std::tie(SinkTo, SinkCandidate) = WorkList[I]; 131bdd1243dSDimitry Andric if (SinkCandidate->getParent() == SinkTo || 132fe6060f1SDimitry Andric SinkCandidate->mayHaveSideEffects() || 133fe6060f1SDimitry Andric SinkCandidate->mayReadOrWriteMemory()) 134fe6060f1SDimitry Andric continue; 135bdd1243dSDimitry Andric if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) { 136bdd1243dSDimitry Andric if (!ScalarVFOnly && RepR->isUniform()) 137bdd1243dSDimitry Andric continue; 138bdd1243dSDimitry Andric } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate)) 139bdd1243dSDimitry Andric continue; 140fe6060f1SDimitry Andric 141349cc55cSDimitry Andric bool NeedsDuplicating = false; 142349cc55cSDimitry Andric // All recipe users of the sink candidate must be in the same block SinkTo 143349cc55cSDimitry Andric // or all users outside of SinkTo must be uniform-after-vectorization ( 144349cc55cSDimitry Andric // i.e., only first lane is used) . In the latter case, we need to duplicate 145bdd1243dSDimitry Andric // SinkCandidate. 146349cc55cSDimitry Andric auto CanSinkWithUser = [SinkTo, &NeedsDuplicating, 147349cc55cSDimitry Andric SinkCandidate](VPUser *U) { 148fe6060f1SDimitry Andric auto *UI = dyn_cast<VPRecipeBase>(U); 149349cc55cSDimitry Andric if (!UI) 150349cc55cSDimitry Andric return false; 151349cc55cSDimitry Andric if (UI->getParent() == SinkTo) 152349cc55cSDimitry Andric return true; 1537a6dacacSDimitry Andric NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate); 154bdd1243dSDimitry Andric // We only know how to duplicate VPRecipeRecipes for now. 155bdd1243dSDimitry Andric return NeedsDuplicating && isa<VPReplicateRecipe>(SinkCandidate); 156349cc55cSDimitry Andric }; 1577a6dacacSDimitry Andric if (!all_of(SinkCandidate->users(), CanSinkWithUser)) 158fe6060f1SDimitry Andric continue; 159fe6060f1SDimitry Andric 160349cc55cSDimitry Andric if (NeedsDuplicating) { 161bdd1243dSDimitry Andric if (ScalarVFOnly) 162bdd1243dSDimitry Andric continue; 163*0fca6ea1SDimitry Andric Instruction *I = SinkCandidate->getUnderlyingInstr(); 16406c3fb27SDimitry Andric auto *Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true); 165349cc55cSDimitry Andric // TODO: add ".cloned" suffix to name of Clone's VPValue. 166349cc55cSDimitry Andric 167349cc55cSDimitry Andric Clone->insertBefore(SinkCandidate); 1687a6dacacSDimitry Andric SinkCandidate->replaceUsesWithIf(Clone, [SinkTo](VPUser &U, unsigned) { 1695f757f3fSDimitry Andric return cast<VPRecipeBase>(&U)->getParent() != SinkTo; 1705f757f3fSDimitry Andric }); 171349cc55cSDimitry Andric } 172fe6060f1SDimitry Andric SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi()); 173349cc55cSDimitry Andric for (VPValue *Op : SinkCandidate->operands()) 1747a6dacacSDimitry Andric if (auto *Def = 1757a6dacacSDimitry Andric dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe())) 176bdd1243dSDimitry Andric WorkList.insert(std::make_pair(SinkTo, Def)); 177fe6060f1SDimitry Andric Changed = true; 178fe6060f1SDimitry Andric } 179fe6060f1SDimitry Andric return Changed; 180fe6060f1SDimitry Andric } 181fe6060f1SDimitry Andric 182fe6060f1SDimitry Andric /// If \p R is a region with a VPBranchOnMaskRecipe in the entry block, return 183fe6060f1SDimitry Andric /// the mask. 184fe6060f1SDimitry Andric VPValue *getPredicatedMask(VPRegionBlock *R) { 185fe6060f1SDimitry Andric auto *EntryBB = dyn_cast<VPBasicBlock>(R->getEntry()); 186fe6060f1SDimitry Andric if (!EntryBB || EntryBB->size() != 1 || 187fe6060f1SDimitry Andric !isa<VPBranchOnMaskRecipe>(EntryBB->begin())) 188fe6060f1SDimitry Andric return nullptr; 189fe6060f1SDimitry Andric 190fe6060f1SDimitry Andric return cast<VPBranchOnMaskRecipe>(&*EntryBB->begin())->getOperand(0); 191fe6060f1SDimitry Andric } 192fe6060f1SDimitry Andric 193fe6060f1SDimitry Andric /// If \p R is a triangle region, return the 'then' block of the triangle. 194fe6060f1SDimitry Andric static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) { 195fe6060f1SDimitry Andric auto *EntryBB = cast<VPBasicBlock>(R->getEntry()); 196fe6060f1SDimitry Andric if (EntryBB->getNumSuccessors() != 2) 197fe6060f1SDimitry Andric return nullptr; 198fe6060f1SDimitry Andric 199fe6060f1SDimitry Andric auto *Succ0 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[0]); 200fe6060f1SDimitry Andric auto *Succ1 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[1]); 201fe6060f1SDimitry Andric if (!Succ0 || !Succ1) 202fe6060f1SDimitry Andric return nullptr; 203fe6060f1SDimitry Andric 204fe6060f1SDimitry Andric if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1) 205fe6060f1SDimitry Andric return nullptr; 206fe6060f1SDimitry Andric if (Succ0->getSingleSuccessor() == Succ1) 207fe6060f1SDimitry Andric return Succ0; 208fe6060f1SDimitry Andric if (Succ1->getSingleSuccessor() == Succ0) 209fe6060f1SDimitry Andric return Succ1; 210fe6060f1SDimitry Andric return nullptr; 211fe6060f1SDimitry Andric } 212fe6060f1SDimitry Andric 21306c3fb27SDimitry Andric // Merge replicate regions in their successor region, if a replicate region 21406c3fb27SDimitry Andric // is connected to a successor replicate region with the same predicate by a 21506c3fb27SDimitry Andric // single, empty VPBasicBlock. 21606c3fb27SDimitry Andric static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan) { 217fe6060f1SDimitry Andric SetVector<VPRegionBlock *> DeletedRegions; 218fe6060f1SDimitry Andric 219bdd1243dSDimitry Andric // Collect replicate regions followed by an empty block, followed by another 220bdd1243dSDimitry Andric // replicate region with matching masks to process front. This is to avoid 221bdd1243dSDimitry Andric // iterator invalidation issues while merging regions. 222bdd1243dSDimitry Andric SmallVector<VPRegionBlock *, 8> WorkList; 223bdd1243dSDimitry Andric for (VPRegionBlock *Region1 : VPBlockUtils::blocksOnly<VPRegionBlock>( 224bdd1243dSDimitry Andric vp_depth_first_deep(Plan.getEntry()))) { 225bdd1243dSDimitry Andric if (!Region1->isReplicator()) 226fe6060f1SDimitry Andric continue; 227fe6060f1SDimitry Andric auto *MiddleBasicBlock = 228fe6060f1SDimitry Andric dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor()); 229fe6060f1SDimitry Andric if (!MiddleBasicBlock || !MiddleBasicBlock->empty()) 230fe6060f1SDimitry Andric continue; 231fe6060f1SDimitry Andric 232fe6060f1SDimitry Andric auto *Region2 = 233fe6060f1SDimitry Andric dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor()); 234bdd1243dSDimitry Andric if (!Region2 || !Region2->isReplicator()) 235fe6060f1SDimitry Andric continue; 236fe6060f1SDimitry Andric 237fe6060f1SDimitry Andric VPValue *Mask1 = getPredicatedMask(Region1); 238fe6060f1SDimitry Andric VPValue *Mask2 = getPredicatedMask(Region2); 239fe6060f1SDimitry Andric if (!Mask1 || Mask1 != Mask2) 240fe6060f1SDimitry Andric continue; 241bdd1243dSDimitry Andric 242bdd1243dSDimitry Andric assert(Mask1 && Mask2 && "both region must have conditions"); 243bdd1243dSDimitry Andric WorkList.push_back(Region1); 244bdd1243dSDimitry Andric } 245bdd1243dSDimitry Andric 246bdd1243dSDimitry Andric // Move recipes from Region1 to its successor region, if both are triangles. 247bdd1243dSDimitry Andric for (VPRegionBlock *Region1 : WorkList) { 248bdd1243dSDimitry Andric if (DeletedRegions.contains(Region1)) 249bdd1243dSDimitry Andric continue; 250bdd1243dSDimitry Andric auto *MiddleBasicBlock = cast<VPBasicBlock>(Region1->getSingleSuccessor()); 251bdd1243dSDimitry Andric auto *Region2 = cast<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor()); 252bdd1243dSDimitry Andric 253fe6060f1SDimitry Andric VPBasicBlock *Then1 = getPredicatedThenBlock(Region1); 254fe6060f1SDimitry Andric VPBasicBlock *Then2 = getPredicatedThenBlock(Region2); 255fe6060f1SDimitry Andric if (!Then1 || !Then2) 256fe6060f1SDimitry Andric continue; 257fe6060f1SDimitry Andric 258fe6060f1SDimitry Andric // Note: No fusion-preventing memory dependencies are expected in either 259fe6060f1SDimitry Andric // region. Such dependencies should be rejected during earlier dependence 260fe6060f1SDimitry Andric // checks, which guarantee accesses can be re-ordered for vectorization. 261fe6060f1SDimitry Andric // 262fe6060f1SDimitry Andric // Move recipes to the successor region. 263fe6060f1SDimitry Andric for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1))) 264fe6060f1SDimitry Andric ToMove.moveBefore(*Then2, Then2->getFirstNonPhi()); 265fe6060f1SDimitry Andric 266fe6060f1SDimitry Andric auto *Merge1 = cast<VPBasicBlock>(Then1->getSingleSuccessor()); 267fe6060f1SDimitry Andric auto *Merge2 = cast<VPBasicBlock>(Then2->getSingleSuccessor()); 268fe6060f1SDimitry Andric 269fe6060f1SDimitry Andric // Move VPPredInstPHIRecipes from the merge block to the successor region's 270fe6060f1SDimitry Andric // merge block. Update all users inside the successor region to use the 271fe6060f1SDimitry Andric // original values. 272fe6060f1SDimitry Andric for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) { 273fe6060f1SDimitry Andric VPValue *PredInst1 = 274fe6060f1SDimitry Andric cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0); 2758c6f6c0cSDimitry Andric VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue(); 2765f757f3fSDimitry Andric Phi1ToMoveV->replaceUsesWithIf(PredInst1, [Then2](VPUser &U, unsigned) { 2775f757f3fSDimitry Andric auto *UI = dyn_cast<VPRecipeBase>(&U); 2785f757f3fSDimitry Andric return UI && UI->getParent() == Then2; 2795f757f3fSDimitry Andric }); 280fe6060f1SDimitry Andric 281*0fca6ea1SDimitry Andric // Remove phi recipes that are unused after merging the regions. 282*0fca6ea1SDimitry Andric if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) { 283*0fca6ea1SDimitry Andric Phi1ToMove.eraseFromParent(); 284*0fca6ea1SDimitry Andric continue; 285*0fca6ea1SDimitry Andric } 286fe6060f1SDimitry Andric Phi1ToMove.moveBefore(*Merge2, Merge2->begin()); 287fe6060f1SDimitry Andric } 288fe6060f1SDimitry Andric 289fe6060f1SDimitry Andric // Finally, remove the first region. 290fe6060f1SDimitry Andric for (VPBlockBase *Pred : make_early_inc_range(Region1->getPredecessors())) { 291fe6060f1SDimitry Andric VPBlockUtils::disconnectBlocks(Pred, Region1); 292fe6060f1SDimitry Andric VPBlockUtils::connectBlocks(Pred, MiddleBasicBlock); 293fe6060f1SDimitry Andric } 294fe6060f1SDimitry Andric VPBlockUtils::disconnectBlocks(Region1, MiddleBasicBlock); 295fe6060f1SDimitry Andric DeletedRegions.insert(Region1); 296fe6060f1SDimitry Andric } 297fe6060f1SDimitry Andric 298fe6060f1SDimitry Andric for (VPRegionBlock *ToDelete : DeletedRegions) 299fe6060f1SDimitry Andric delete ToDelete; 300bdd1243dSDimitry Andric return !DeletedRegions.empty(); 301bdd1243dSDimitry Andric } 302bdd1243dSDimitry Andric 30306c3fb27SDimitry Andric static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe, 30406c3fb27SDimitry Andric VPlan &Plan) { 30506c3fb27SDimitry Andric Instruction *Instr = PredRecipe->getUnderlyingInstr(); 30606c3fb27SDimitry Andric // Build the triangular if-then region. 30706c3fb27SDimitry Andric std::string RegionName = (Twine("pred.") + Instr->getOpcodeName()).str(); 30806c3fb27SDimitry Andric assert(Instr->getParent() && "Predicated instruction not in any basic block"); 30906c3fb27SDimitry Andric auto *BlockInMask = PredRecipe->getMask(); 31006c3fb27SDimitry Andric auto *BOMRecipe = new VPBranchOnMaskRecipe(BlockInMask); 31106c3fb27SDimitry Andric auto *Entry = new VPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe); 31206c3fb27SDimitry Andric 31306c3fb27SDimitry Andric // Replace predicated replicate recipe with a replicate recipe without a 31406c3fb27SDimitry Andric // mask but in the replicate region. 31506c3fb27SDimitry Andric auto *RecipeWithoutMask = new VPReplicateRecipe( 31606c3fb27SDimitry Andric PredRecipe->getUnderlyingInstr(), 31706c3fb27SDimitry Andric make_range(PredRecipe->op_begin(), std::prev(PredRecipe->op_end())), 31806c3fb27SDimitry Andric PredRecipe->isUniform()); 31906c3fb27SDimitry Andric auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask); 32006c3fb27SDimitry Andric 32106c3fb27SDimitry Andric VPPredInstPHIRecipe *PHIRecipe = nullptr; 32206c3fb27SDimitry Andric if (PredRecipe->getNumUsers() != 0) { 32306c3fb27SDimitry Andric PHIRecipe = new VPPredInstPHIRecipe(RecipeWithoutMask); 32406c3fb27SDimitry Andric PredRecipe->replaceAllUsesWith(PHIRecipe); 32506c3fb27SDimitry Andric PHIRecipe->setOperand(0, RecipeWithoutMask); 32606c3fb27SDimitry Andric } 32706c3fb27SDimitry Andric PredRecipe->eraseFromParent(); 32806c3fb27SDimitry Andric auto *Exiting = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe); 32906c3fb27SDimitry Andric VPRegionBlock *Region = new VPRegionBlock(Entry, Exiting, RegionName, true); 33006c3fb27SDimitry Andric 33106c3fb27SDimitry Andric // Note: first set Entry as region entry and then connect successors starting 33206c3fb27SDimitry Andric // from it in order, to propagate the "parent" of each VPBasicBlock. 33306c3fb27SDimitry Andric VPBlockUtils::insertTwoBlocksAfter(Pred, Exiting, Entry); 33406c3fb27SDimitry Andric VPBlockUtils::connectBlocks(Pred, Exiting); 33506c3fb27SDimitry Andric 33606c3fb27SDimitry Andric return Region; 33706c3fb27SDimitry Andric } 33806c3fb27SDimitry Andric 33906c3fb27SDimitry Andric static void addReplicateRegions(VPlan &Plan) { 34006c3fb27SDimitry Andric SmallVector<VPReplicateRecipe *> WorkList; 34106c3fb27SDimitry Andric for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( 34206c3fb27SDimitry Andric vp_depth_first_deep(Plan.getEntry()))) { 34306c3fb27SDimitry Andric for (VPRecipeBase &R : *VPBB) 34406c3fb27SDimitry Andric if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) { 34506c3fb27SDimitry Andric if (RepR->isPredicated()) 34606c3fb27SDimitry Andric WorkList.push_back(RepR); 34706c3fb27SDimitry Andric } 34806c3fb27SDimitry Andric } 34906c3fb27SDimitry Andric 35006c3fb27SDimitry Andric unsigned BBNum = 0; 35106c3fb27SDimitry Andric for (VPReplicateRecipe *RepR : WorkList) { 35206c3fb27SDimitry Andric VPBasicBlock *CurrentBlock = RepR->getParent(); 35306c3fb27SDimitry Andric VPBasicBlock *SplitBlock = CurrentBlock->splitAt(RepR->getIterator()); 35406c3fb27SDimitry Andric 35506c3fb27SDimitry Andric BasicBlock *OrigBB = RepR->getUnderlyingInstr()->getParent(); 35606c3fb27SDimitry Andric SplitBlock->setName( 35706c3fb27SDimitry Andric OrigBB->hasName() ? OrigBB->getName() + "." + Twine(BBNum++) : ""); 35806c3fb27SDimitry Andric // Record predicated instructions for above packing optimizations. 35906c3fb27SDimitry Andric VPBlockBase *Region = createReplicateRegion(RepR, Plan); 36006c3fb27SDimitry Andric Region->setParent(CurrentBlock->getParent()); 36106c3fb27SDimitry Andric VPBlockUtils::disconnectBlocks(CurrentBlock, SplitBlock); 36206c3fb27SDimitry Andric VPBlockUtils::connectBlocks(CurrentBlock, Region); 36306c3fb27SDimitry Andric VPBlockUtils::connectBlocks(Region, SplitBlock); 36406c3fb27SDimitry Andric } 36506c3fb27SDimitry Andric } 36606c3fb27SDimitry Andric 367*0fca6ea1SDimitry Andric /// Remove redundant VPBasicBlocks by merging them into their predecessor if 368*0fca6ea1SDimitry Andric /// the predecessor has a single successor. 369*0fca6ea1SDimitry Andric static bool mergeBlocksIntoPredecessors(VPlan &Plan) { 370bdd1243dSDimitry Andric SmallVector<VPBasicBlock *> WorkList; 371bdd1243dSDimitry Andric for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( 372bdd1243dSDimitry Andric vp_depth_first_deep(Plan.getEntry()))) { 373*0fca6ea1SDimitry Andric // Don't fold the exit block of the Plan into its single predecessor for 374*0fca6ea1SDimitry Andric // now. 375*0fca6ea1SDimitry Andric // TODO: Remove restriction once more of the skeleton is modeled in VPlan. 376*0fca6ea1SDimitry Andric if (VPBB->getNumSuccessors() == 0 && !VPBB->getParent()) 377*0fca6ea1SDimitry Andric continue; 378bdd1243dSDimitry Andric auto *PredVPBB = 379bdd1243dSDimitry Andric dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor()); 380*0fca6ea1SDimitry Andric if (!PredVPBB || PredVPBB->getNumSuccessors() != 1) 381*0fca6ea1SDimitry Andric continue; 382bdd1243dSDimitry Andric WorkList.push_back(VPBB); 383bdd1243dSDimitry Andric } 384bdd1243dSDimitry Andric 385bdd1243dSDimitry Andric for (VPBasicBlock *VPBB : WorkList) { 386bdd1243dSDimitry Andric VPBasicBlock *PredVPBB = cast<VPBasicBlock>(VPBB->getSinglePredecessor()); 387bdd1243dSDimitry Andric for (VPRecipeBase &R : make_early_inc_range(*VPBB)) 388bdd1243dSDimitry Andric R.moveBefore(*PredVPBB, PredVPBB->end()); 389bdd1243dSDimitry Andric VPBlockUtils::disconnectBlocks(PredVPBB, VPBB); 390bdd1243dSDimitry Andric auto *ParentRegion = cast_or_null<VPRegionBlock>(VPBB->getParent()); 391bdd1243dSDimitry Andric if (ParentRegion && ParentRegion->getExiting() == VPBB) 392bdd1243dSDimitry Andric ParentRegion->setExiting(PredVPBB); 393bdd1243dSDimitry Andric for (auto *Succ : to_vector(VPBB->successors())) { 394bdd1243dSDimitry Andric VPBlockUtils::disconnectBlocks(VPBB, Succ); 395bdd1243dSDimitry Andric VPBlockUtils::connectBlocks(PredVPBB, Succ); 396bdd1243dSDimitry Andric } 397bdd1243dSDimitry Andric delete VPBB; 398bdd1243dSDimitry Andric } 399bdd1243dSDimitry Andric return !WorkList.empty(); 400fe6060f1SDimitry Andric } 4010eae32dcSDimitry Andric 402*0fca6ea1SDimitry Andric void VPlanTransforms::createAndOptimizeReplicateRegions(VPlan &Plan) { 403*0fca6ea1SDimitry Andric // Convert masked VPReplicateRecipes to if-then region blocks. 404*0fca6ea1SDimitry Andric addReplicateRegions(Plan); 405*0fca6ea1SDimitry Andric 406*0fca6ea1SDimitry Andric bool ShouldSimplify = true; 407*0fca6ea1SDimitry Andric while (ShouldSimplify) { 408*0fca6ea1SDimitry Andric ShouldSimplify = sinkScalarOperands(Plan); 409*0fca6ea1SDimitry Andric ShouldSimplify |= mergeReplicateRegionsIntoSuccessors(Plan); 410*0fca6ea1SDimitry Andric ShouldSimplify |= mergeBlocksIntoPredecessors(Plan); 411*0fca6ea1SDimitry Andric } 412*0fca6ea1SDimitry Andric } 413*0fca6ea1SDimitry Andric 414*0fca6ea1SDimitry Andric /// Remove redundant casts of inductions. 415*0fca6ea1SDimitry Andric /// 416*0fca6ea1SDimitry Andric /// Such redundant casts are casts of induction variables that can be ignored, 417*0fca6ea1SDimitry Andric /// because we already proved that the casted phi is equal to the uncasted phi 418*0fca6ea1SDimitry Andric /// in the vectorized loop. There is no need to vectorize the cast - the same 419*0fca6ea1SDimitry Andric /// value can be used for both the phi and casts in the vector loop. 420*0fca6ea1SDimitry Andric static void removeRedundantInductionCasts(VPlan &Plan) { 42181ad6265SDimitry Andric for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { 4220eae32dcSDimitry Andric auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); 4230eae32dcSDimitry Andric if (!IV || IV->getTruncInst()) 4240eae32dcSDimitry Andric continue; 4250eae32dcSDimitry Andric 42681ad6265SDimitry Andric // A sequence of IR Casts has potentially been recorded for IV, which 42781ad6265SDimitry Andric // *must be bypassed* when the IV is vectorized, because the vectorized IV 42881ad6265SDimitry Andric // will produce the desired casted value. This sequence forms a def-use 42981ad6265SDimitry Andric // chain and is provided in reverse order, ending with the cast that uses 43081ad6265SDimitry Andric // the IV phi. Search for the recipe of the last cast in the chain and 43181ad6265SDimitry Andric // replace it with the original IV. Note that only the final cast is 43281ad6265SDimitry Andric // expected to have users outside the cast-chain and the dead casts left 43381ad6265SDimitry Andric // over will be cleaned up later. 4340eae32dcSDimitry Andric auto &Casts = IV->getInductionDescriptor().getCastInsts(); 4350eae32dcSDimitry Andric VPValue *FindMyCast = IV; 4360eae32dcSDimitry Andric for (Instruction *IRCast : reverse(Casts)) { 4377a6dacacSDimitry Andric VPSingleDefRecipe *FoundUserCast = nullptr; 4380eae32dcSDimitry Andric for (auto *U : FindMyCast->users()) { 4397a6dacacSDimitry Andric auto *UserCast = dyn_cast<VPSingleDefRecipe>(U); 4407a6dacacSDimitry Andric if (UserCast && UserCast->getUnderlyingValue() == IRCast) { 4410eae32dcSDimitry Andric FoundUserCast = UserCast; 4420eae32dcSDimitry Andric break; 4430eae32dcSDimitry Andric } 4440eae32dcSDimitry Andric } 4457a6dacacSDimitry Andric FindMyCast = FoundUserCast; 4460eae32dcSDimitry Andric } 44781ad6265SDimitry Andric FindMyCast->replaceAllUsesWith(IV); 4480eae32dcSDimitry Andric } 4490eae32dcSDimitry Andric } 45004eeddc0SDimitry Andric 451*0fca6ea1SDimitry Andric /// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV 452*0fca6ea1SDimitry Andric /// recipe, if it exists. 453*0fca6ea1SDimitry Andric static void removeRedundantCanonicalIVs(VPlan &Plan) { 45404eeddc0SDimitry Andric VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); 45504eeddc0SDimitry Andric VPWidenCanonicalIVRecipe *WidenNewIV = nullptr; 45604eeddc0SDimitry Andric for (VPUser *U : CanonicalIV->users()) { 45704eeddc0SDimitry Andric WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U); 45804eeddc0SDimitry Andric if (WidenNewIV) 45904eeddc0SDimitry Andric break; 46004eeddc0SDimitry Andric } 46104eeddc0SDimitry Andric 46204eeddc0SDimitry Andric if (!WidenNewIV) 46304eeddc0SDimitry Andric return; 46404eeddc0SDimitry Andric 46504eeddc0SDimitry Andric VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); 46604eeddc0SDimitry Andric for (VPRecipeBase &Phi : HeaderVPBB->phis()) { 46704eeddc0SDimitry Andric auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); 46804eeddc0SDimitry Andric 469*0fca6ea1SDimitry Andric if (!WidenOriginalIV || !WidenOriginalIV->isCanonical()) 4701fd87a68SDimitry Andric continue; 4711fd87a68SDimitry Andric 4721fd87a68SDimitry Andric // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides 4731fd87a68SDimitry Andric // everything WidenNewIV's users need. That is, WidenOriginalIV will 4741fd87a68SDimitry Andric // generate a vector phi or all users of WidenNewIV demand the first lane 4751fd87a68SDimitry Andric // only. 47606c3fb27SDimitry Andric if (any_of(WidenOriginalIV->users(), 47706c3fb27SDimitry Andric [WidenOriginalIV](VPUser *U) { 47806c3fb27SDimitry Andric return !U->usesScalars(WidenOriginalIV); 47906c3fb27SDimitry Andric }) || 4801fd87a68SDimitry Andric vputils::onlyFirstLaneUsed(WidenNewIV)) { 48104eeddc0SDimitry Andric WidenNewIV->replaceAllUsesWith(WidenOriginalIV); 48204eeddc0SDimitry Andric WidenNewIV->eraseFromParent(); 48304eeddc0SDimitry Andric return; 48404eeddc0SDimitry Andric } 48504eeddc0SDimitry Andric } 48604eeddc0SDimitry Andric } 48781ad6265SDimitry Andric 488*0fca6ea1SDimitry Andric /// Returns true if \p R is dead and can be removed. 489*0fca6ea1SDimitry Andric static bool isDeadRecipe(VPRecipeBase &R) { 490*0fca6ea1SDimitry Andric using namespace llvm::PatternMatch; 491*0fca6ea1SDimitry Andric // Do remove conditional assume instructions as their conditions may be 492*0fca6ea1SDimitry Andric // flattened. 493*0fca6ea1SDimitry Andric auto *RepR = dyn_cast<VPReplicateRecipe>(&R); 494*0fca6ea1SDimitry Andric bool IsConditionalAssume = 495*0fca6ea1SDimitry Andric RepR && RepR->isPredicated() && 496*0fca6ea1SDimitry Andric match(RepR->getUnderlyingInstr(), m_Intrinsic<Intrinsic::assume>()); 497*0fca6ea1SDimitry Andric if (IsConditionalAssume) 498*0fca6ea1SDimitry Andric return true; 499*0fca6ea1SDimitry Andric 500*0fca6ea1SDimitry Andric if (R.mayHaveSideEffects()) 501*0fca6ea1SDimitry Andric return false; 502*0fca6ea1SDimitry Andric 503*0fca6ea1SDimitry Andric // Recipe is dead if no user keeps the recipe alive. 504*0fca6ea1SDimitry Andric return all_of(R.definedValues(), 505*0fca6ea1SDimitry Andric [](VPValue *V) { return V->getNumUsers() == 0; }); 506*0fca6ea1SDimitry Andric } 507*0fca6ea1SDimitry Andric 508*0fca6ea1SDimitry Andric static void removeDeadRecipes(VPlan &Plan) { 509bdd1243dSDimitry Andric ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( 510bdd1243dSDimitry Andric Plan.getEntry()); 51181ad6265SDimitry Andric 51281ad6265SDimitry Andric for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) { 51381ad6265SDimitry Andric // The recipes in the block are processed in reverse order, to catch chains 51481ad6265SDimitry Andric // of dead recipes. 51581ad6265SDimitry Andric for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) { 516*0fca6ea1SDimitry Andric if (isDeadRecipe(R)) 51781ad6265SDimitry Andric R.eraseFromParent(); 51881ad6265SDimitry Andric } 51981ad6265SDimitry Andric } 52081ad6265SDimitry Andric } 52181ad6265SDimitry Andric 522*0fca6ea1SDimitry Andric static VPScalarIVStepsRecipe * 523*0fca6ea1SDimitry Andric createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind, 524*0fca6ea1SDimitry Andric Instruction::BinaryOps InductionOpcode, 525*0fca6ea1SDimitry Andric FPMathOperator *FPBinOp, ScalarEvolution &SE, 526*0fca6ea1SDimitry Andric Instruction *TruncI, VPValue *StartV, VPValue *Step, 527*0fca6ea1SDimitry Andric VPBasicBlock::iterator IP) { 5285f757f3fSDimitry Andric VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); 5295f757f3fSDimitry Andric VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); 530*0fca6ea1SDimitry Andric VPSingleDefRecipe *BaseIV = CanonicalIV; 531*0fca6ea1SDimitry Andric if (!CanonicalIV->isCanonical(Kind, StartV, Step)) { 532*0fca6ea1SDimitry Andric BaseIV = new VPDerivedIVRecipe(Kind, FPBinOp, StartV, CanonicalIV, Step); 533*0fca6ea1SDimitry Andric HeaderVPBB->insert(BaseIV, IP); 5345f757f3fSDimitry Andric } 5355f757f3fSDimitry Andric 536*0fca6ea1SDimitry Andric // Truncate base induction if needed. 537*0fca6ea1SDimitry Andric VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(), 538*0fca6ea1SDimitry Andric SE.getContext()); 539*0fca6ea1SDimitry Andric Type *ResultTy = TypeInfo.inferScalarType(BaseIV); 540*0fca6ea1SDimitry Andric if (TruncI) { 541*0fca6ea1SDimitry Andric Type *TruncTy = TruncI->getType(); 542*0fca6ea1SDimitry Andric assert(ResultTy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits() && 543*0fca6ea1SDimitry Andric "Not truncating."); 544*0fca6ea1SDimitry Andric assert(ResultTy->isIntegerTy() && "Truncation requires an integer type"); 545*0fca6ea1SDimitry Andric BaseIV = new VPScalarCastRecipe(Instruction::Trunc, BaseIV, TruncTy); 546*0fca6ea1SDimitry Andric HeaderVPBB->insert(BaseIV, IP); 547*0fca6ea1SDimitry Andric ResultTy = TruncTy; 548*0fca6ea1SDimitry Andric } 549*0fca6ea1SDimitry Andric 550*0fca6ea1SDimitry Andric // Truncate step if needed. 551*0fca6ea1SDimitry Andric Type *StepTy = TypeInfo.inferScalarType(Step); 552*0fca6ea1SDimitry Andric if (ResultTy != StepTy) { 553*0fca6ea1SDimitry Andric assert(StepTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits() && 554*0fca6ea1SDimitry Andric "Not truncating."); 555*0fca6ea1SDimitry Andric assert(StepTy->isIntegerTy() && "Truncation requires an integer type"); 556*0fca6ea1SDimitry Andric Step = new VPScalarCastRecipe(Instruction::Trunc, Step, ResultTy); 557*0fca6ea1SDimitry Andric auto *VecPreheader = 558*0fca6ea1SDimitry Andric cast<VPBasicBlock>(HeaderVPBB->getSingleHierarchicalPredecessor()); 559*0fca6ea1SDimitry Andric VecPreheader->appendRecipe(Step->getDefiningRecipe()); 560*0fca6ea1SDimitry Andric } 561*0fca6ea1SDimitry Andric 562*0fca6ea1SDimitry Andric VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe( 563*0fca6ea1SDimitry Andric BaseIV, Step, InductionOpcode, 564*0fca6ea1SDimitry Andric FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags()); 5655f757f3fSDimitry Andric HeaderVPBB->insert(Steps, IP); 5665f757f3fSDimitry Andric return Steps; 5675f757f3fSDimitry Andric } 5685f757f3fSDimitry Andric 569*0fca6ea1SDimitry Andric /// Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd 570*0fca6ea1SDimitry Andric /// (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as 571*0fca6ea1SDimitry Andric /// VPWidenPointerInductionRecipe will generate vectors only. If some users 572*0fca6ea1SDimitry Andric /// require vectors while other require scalars, the scalar uses need to extract 573*0fca6ea1SDimitry Andric /// the scalars from the generated vectors (Note that this is different to how 574*0fca6ea1SDimitry Andric /// int/fp inductions are handled). Also optimize VPWidenIntOrFpInductionRecipe, 575*0fca6ea1SDimitry Andric /// if any of its users needs scalar values, by providing them scalar steps 576*0fca6ea1SDimitry Andric /// built on the canonical scalar IV and update the original IV's users. This is 577*0fca6ea1SDimitry Andric /// an optional optimization to reduce the needs of vector extracts. 578*0fca6ea1SDimitry Andric static void legalizeAndOptimizeInductions(VPlan &Plan, ScalarEvolution &SE) { 57981ad6265SDimitry Andric SmallVector<VPRecipeBase *> ToRemove; 58081ad6265SDimitry Andric VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); 58181ad6265SDimitry Andric bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1)); 582*0fca6ea1SDimitry Andric VPBasicBlock::iterator InsertPt = HeaderVPBB->getFirstNonPhi(); 58381ad6265SDimitry Andric for (VPRecipeBase &Phi : HeaderVPBB->phis()) { 584*0fca6ea1SDimitry Andric // Replace wide pointer inductions which have only their scalars used by 585*0fca6ea1SDimitry Andric // PtrAdd(IndStart, ScalarIVSteps (0, Step)). 586*0fca6ea1SDimitry Andric if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) { 587*0fca6ea1SDimitry Andric if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF())) 588*0fca6ea1SDimitry Andric continue; 589*0fca6ea1SDimitry Andric 590*0fca6ea1SDimitry Andric const InductionDescriptor &ID = PtrIV->getInductionDescriptor(); 591*0fca6ea1SDimitry Andric VPValue *StartV = 592*0fca6ea1SDimitry Andric Plan.getOrAddLiveIn(ConstantInt::get(ID.getStep()->getType(), 0)); 593*0fca6ea1SDimitry Andric VPValue *StepV = PtrIV->getOperand(1); 594*0fca6ea1SDimitry Andric VPScalarIVStepsRecipe *Steps = createScalarIVSteps( 595*0fca6ea1SDimitry Andric Plan, InductionDescriptor::IK_IntInduction, Instruction::Add, nullptr, 596*0fca6ea1SDimitry Andric SE, nullptr, StartV, StepV, InsertPt); 597*0fca6ea1SDimitry Andric 598*0fca6ea1SDimitry Andric auto *Recipe = new VPInstruction(VPInstruction::PtrAdd, 599*0fca6ea1SDimitry Andric {PtrIV->getStartValue(), Steps}, 600*0fca6ea1SDimitry Andric PtrIV->getDebugLoc(), "next.gep"); 601*0fca6ea1SDimitry Andric 602*0fca6ea1SDimitry Andric Recipe->insertAfter(Steps); 603*0fca6ea1SDimitry Andric PtrIV->replaceAllUsesWith(Recipe); 604*0fca6ea1SDimitry Andric continue; 605*0fca6ea1SDimitry Andric } 606*0fca6ea1SDimitry Andric 607*0fca6ea1SDimitry Andric // Replace widened induction with scalar steps for users that only use 608*0fca6ea1SDimitry Andric // scalars. 609bdd1243dSDimitry Andric auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); 610bdd1243dSDimitry Andric if (!WideIV) 61181ad6265SDimitry Andric continue; 612bdd1243dSDimitry Andric if (HasOnlyVectorVFs && none_of(WideIV->users(), [WideIV](VPUser *U) { 613bdd1243dSDimitry Andric return U->usesScalars(WideIV); 614bdd1243dSDimitry Andric })) 61581ad6265SDimitry Andric continue; 61681ad6265SDimitry Andric 617bdd1243dSDimitry Andric const InductionDescriptor &ID = WideIV->getInductionDescriptor(); 618*0fca6ea1SDimitry Andric VPScalarIVStepsRecipe *Steps = createScalarIVSteps( 619*0fca6ea1SDimitry Andric Plan, ID.getKind(), ID.getInductionOpcode(), 620*0fca6ea1SDimitry Andric dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()), SE, 621*0fca6ea1SDimitry Andric WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(), 622*0fca6ea1SDimitry Andric InsertPt); 623bdd1243dSDimitry Andric 6245f757f3fSDimitry Andric // Update scalar users of IV to use Step instead. 6255f757f3fSDimitry Andric if (!HasOnlyVectorVFs) 6265f757f3fSDimitry Andric WideIV->replaceAllUsesWith(Steps); 6275f757f3fSDimitry Andric else 6285f757f3fSDimitry Andric WideIV->replaceUsesWithIf(Steps, [WideIV](VPUser &U, unsigned) { 6295f757f3fSDimitry Andric return U.usesScalars(WideIV); 6305f757f3fSDimitry Andric }); 63181ad6265SDimitry Andric } 63281ad6265SDimitry Andric } 63381ad6265SDimitry Andric 634*0fca6ea1SDimitry Andric /// Remove redundant EpxandSCEVRecipes in \p Plan's entry block by replacing 635*0fca6ea1SDimitry Andric /// them with already existing recipes expanding the same SCEV expression. 636*0fca6ea1SDimitry Andric static void removeRedundantExpandSCEVRecipes(VPlan &Plan) { 63781ad6265SDimitry Andric DenseMap<const SCEV *, VPValue *> SCEV2VPV; 63881ad6265SDimitry Andric 63981ad6265SDimitry Andric for (VPRecipeBase &R : 64081ad6265SDimitry Andric make_early_inc_range(*Plan.getEntry()->getEntryBasicBlock())) { 64181ad6265SDimitry Andric auto *ExpR = dyn_cast<VPExpandSCEVRecipe>(&R); 64281ad6265SDimitry Andric if (!ExpR) 64381ad6265SDimitry Andric continue; 64481ad6265SDimitry Andric 64581ad6265SDimitry Andric auto I = SCEV2VPV.insert({ExpR->getSCEV(), ExpR}); 64681ad6265SDimitry Andric if (I.second) 64781ad6265SDimitry Andric continue; 64881ad6265SDimitry Andric ExpR->replaceAllUsesWith(I.first->second); 64981ad6265SDimitry Andric ExpR->eraseFromParent(); 65081ad6265SDimitry Andric } 65181ad6265SDimitry Andric } 652bdd1243dSDimitry Andric 653*0fca6ea1SDimitry Andric static void recursivelyDeleteDeadRecipes(VPValue *V) { 654*0fca6ea1SDimitry Andric SmallVector<VPValue *> WorkList; 655*0fca6ea1SDimitry Andric SmallPtrSet<VPValue *, 8> Seen; 656*0fca6ea1SDimitry Andric WorkList.push_back(V); 657bdd1243dSDimitry Andric 658*0fca6ea1SDimitry Andric while (!WorkList.empty()) { 659*0fca6ea1SDimitry Andric VPValue *Cur = WorkList.pop_back_val(); 660*0fca6ea1SDimitry Andric if (!Seen.insert(Cur).second) 661*0fca6ea1SDimitry Andric continue; 662*0fca6ea1SDimitry Andric VPRecipeBase *R = Cur->getDefiningRecipe(); 663*0fca6ea1SDimitry Andric if (!R) 664*0fca6ea1SDimitry Andric continue; 665*0fca6ea1SDimitry Andric if (!isDeadRecipe(*R)) 666*0fca6ea1SDimitry Andric continue; 667*0fca6ea1SDimitry Andric WorkList.append(R->op_begin(), R->op_end()); 668*0fca6ea1SDimitry Andric R->eraseFromParent(); 669*0fca6ea1SDimitry Andric } 670bdd1243dSDimitry Andric } 671bdd1243dSDimitry Andric 672bdd1243dSDimitry Andric void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, 673bdd1243dSDimitry Andric unsigned BestUF, 674bdd1243dSDimitry Andric PredicatedScalarEvolution &PSE) { 675bdd1243dSDimitry Andric assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan"); 676bdd1243dSDimitry Andric assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan"); 677bdd1243dSDimitry Andric VPBasicBlock *ExitingVPBB = 678bdd1243dSDimitry Andric Plan.getVectorLoopRegion()->getExitingBasicBlock(); 679*0fca6ea1SDimitry Andric auto *Term = &ExitingVPBB->back(); 680bdd1243dSDimitry Andric // Try to simplify the branch condition if TC <= VF * UF when preparing to 681bdd1243dSDimitry Andric // execute the plan for the main vector loop. We only do this if the 682bdd1243dSDimitry Andric // terminator is: 683bdd1243dSDimitry Andric // 1. BranchOnCount, or 684bdd1243dSDimitry Andric // 2. BranchOnCond where the input is Not(ActiveLaneMask). 685*0fca6ea1SDimitry Andric using namespace llvm::VPlanPatternMatch; 686*0fca6ea1SDimitry Andric if (!match(Term, m_BranchOnCount(m_VPValue(), m_VPValue())) && 687*0fca6ea1SDimitry Andric !match(Term, 688*0fca6ea1SDimitry Andric m_BranchOnCond(m_Not(m_ActiveLaneMask(m_VPValue(), m_VPValue()))))) 689bdd1243dSDimitry Andric return; 690bdd1243dSDimitry Andric 691bdd1243dSDimitry Andric Type *IdxTy = 692bdd1243dSDimitry Andric Plan.getCanonicalIV()->getStartValue()->getLiveInIRValue()->getType(); 693bdd1243dSDimitry Andric const SCEV *TripCount = createTripCountSCEV(IdxTy, PSE); 694bdd1243dSDimitry Andric ScalarEvolution &SE = *PSE.getSE(); 695*0fca6ea1SDimitry Andric ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF); 696*0fca6ea1SDimitry Andric const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements); 697bdd1243dSDimitry Andric if (TripCount->isZero() || 698bdd1243dSDimitry Andric !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C)) 699bdd1243dSDimitry Andric return; 700bdd1243dSDimitry Andric 701bdd1243dSDimitry Andric LLVMContext &Ctx = SE.getContext(); 702*0fca6ea1SDimitry Andric auto *BOC = 703*0fca6ea1SDimitry Andric new VPInstruction(VPInstruction::BranchOnCond, 704*0fca6ea1SDimitry Andric {Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))}); 705*0fca6ea1SDimitry Andric 706*0fca6ea1SDimitry Andric SmallVector<VPValue *> PossiblyDead(Term->operands()); 707bdd1243dSDimitry Andric Term->eraseFromParent(); 708*0fca6ea1SDimitry Andric for (VPValue *Op : PossiblyDead) 709*0fca6ea1SDimitry Andric recursivelyDeleteDeadRecipes(Op); 710bdd1243dSDimitry Andric ExitingVPBB->appendRecipe(BOC); 711bdd1243dSDimitry Andric Plan.setVF(BestVF); 712bdd1243dSDimitry Andric Plan.setUF(BestUF); 713bdd1243dSDimitry Andric // TODO: Further simplifications are possible 714bdd1243dSDimitry Andric // 1. Replace inductions with constants. 715bdd1243dSDimitry Andric // 2. Replace vector loop region with VPBasicBlock. 716bdd1243dSDimitry Andric } 71706c3fb27SDimitry Andric 71806c3fb27SDimitry Andric #ifndef NDEBUG 71906c3fb27SDimitry Andric static VPRegionBlock *GetReplicateRegion(VPRecipeBase *R) { 72006c3fb27SDimitry Andric auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent()); 72106c3fb27SDimitry Andric if (Region && Region->isReplicator()) { 72206c3fb27SDimitry Andric assert(Region->getNumSuccessors() == 1 && 72306c3fb27SDimitry Andric Region->getNumPredecessors() == 1 && "Expected SESE region!"); 72406c3fb27SDimitry Andric assert(R->getParent()->size() == 1 && 72506c3fb27SDimitry Andric "A recipe in an original replicator region must be the only " 72606c3fb27SDimitry Andric "recipe in its block"); 72706c3fb27SDimitry Andric return Region; 72806c3fb27SDimitry Andric } 72906c3fb27SDimitry Andric return nullptr; 73006c3fb27SDimitry Andric } 73106c3fb27SDimitry Andric #endif 73206c3fb27SDimitry Andric 73306c3fb27SDimitry Andric static bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B, 73406c3fb27SDimitry Andric VPDominatorTree &VPDT) { 73506c3fb27SDimitry Andric if (A == B) 73606c3fb27SDimitry Andric return false; 73706c3fb27SDimitry Andric 73806c3fb27SDimitry Andric auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) { 73906c3fb27SDimitry Andric for (auto &R : *A->getParent()) { 74006c3fb27SDimitry Andric if (&R == A) 74106c3fb27SDimitry Andric return true; 74206c3fb27SDimitry Andric if (&R == B) 74306c3fb27SDimitry Andric return false; 74406c3fb27SDimitry Andric } 74506c3fb27SDimitry Andric llvm_unreachable("recipe not found"); 74606c3fb27SDimitry Andric }; 74706c3fb27SDimitry Andric const VPBlockBase *ParentA = A->getParent(); 74806c3fb27SDimitry Andric const VPBlockBase *ParentB = B->getParent(); 74906c3fb27SDimitry Andric if (ParentA == ParentB) 75006c3fb27SDimitry Andric return LocalComesBefore(A, B); 75106c3fb27SDimitry Andric 75206c3fb27SDimitry Andric assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) && 75306c3fb27SDimitry Andric "No replicate regions expected at this point"); 75406c3fb27SDimitry Andric assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) && 75506c3fb27SDimitry Andric "No replicate regions expected at this point"); 75606c3fb27SDimitry Andric return VPDT.properlyDominates(ParentA, ParentB); 75706c3fb27SDimitry Andric } 75806c3fb27SDimitry Andric 75906c3fb27SDimitry Andric /// Sink users of \p FOR after the recipe defining the previous value \p 76006c3fb27SDimitry Andric /// Previous of the recurrence. \returns true if all users of \p FOR could be 76106c3fb27SDimitry Andric /// re-arranged as needed or false if it is not possible. 76206c3fb27SDimitry Andric static bool 76306c3fb27SDimitry Andric sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, 76406c3fb27SDimitry Andric VPRecipeBase *Previous, 76506c3fb27SDimitry Andric VPDominatorTree &VPDT) { 76606c3fb27SDimitry Andric // Collect recipes that need sinking. 76706c3fb27SDimitry Andric SmallVector<VPRecipeBase *> WorkList; 76806c3fb27SDimitry Andric SmallPtrSet<VPRecipeBase *, 8> Seen; 76906c3fb27SDimitry Andric Seen.insert(Previous); 77006c3fb27SDimitry Andric auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) { 77106c3fb27SDimitry Andric // The previous value must not depend on the users of the recurrence phi. In 77206c3fb27SDimitry Andric // that case, FOR is not a fixed order recurrence. 77306c3fb27SDimitry Andric if (SinkCandidate == Previous) 77406c3fb27SDimitry Andric return false; 77506c3fb27SDimitry Andric 77606c3fb27SDimitry Andric if (isa<VPHeaderPHIRecipe>(SinkCandidate) || 77706c3fb27SDimitry Andric !Seen.insert(SinkCandidate).second || 77806c3fb27SDimitry Andric properlyDominates(Previous, SinkCandidate, VPDT)) 77906c3fb27SDimitry Andric return true; 78006c3fb27SDimitry Andric 78106c3fb27SDimitry Andric if (SinkCandidate->mayHaveSideEffects()) 78206c3fb27SDimitry Andric return false; 78306c3fb27SDimitry Andric 78406c3fb27SDimitry Andric WorkList.push_back(SinkCandidate); 78506c3fb27SDimitry Andric return true; 78606c3fb27SDimitry Andric }; 78706c3fb27SDimitry Andric 78806c3fb27SDimitry Andric // Recursively sink users of FOR after Previous. 78906c3fb27SDimitry Andric WorkList.push_back(FOR); 79006c3fb27SDimitry Andric for (unsigned I = 0; I != WorkList.size(); ++I) { 79106c3fb27SDimitry Andric VPRecipeBase *Current = WorkList[I]; 79206c3fb27SDimitry Andric assert(Current->getNumDefinedValues() == 1 && 79306c3fb27SDimitry Andric "only recipes with a single defined value expected"); 79406c3fb27SDimitry Andric 79506c3fb27SDimitry Andric for (VPUser *User : Current->getVPSingleValue()->users()) { 79606c3fb27SDimitry Andric if (auto *R = dyn_cast<VPRecipeBase>(User)) 79706c3fb27SDimitry Andric if (!TryToPushSinkCandidate(R)) 79806c3fb27SDimitry Andric return false; 79906c3fb27SDimitry Andric } 80006c3fb27SDimitry Andric } 80106c3fb27SDimitry Andric 80206c3fb27SDimitry Andric // Keep recipes to sink ordered by dominance so earlier instructions are 80306c3fb27SDimitry Andric // processed first. 80406c3fb27SDimitry Andric sort(WorkList, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) { 80506c3fb27SDimitry Andric return properlyDominates(A, B, VPDT); 80606c3fb27SDimitry Andric }); 80706c3fb27SDimitry Andric 80806c3fb27SDimitry Andric for (VPRecipeBase *SinkCandidate : WorkList) { 80906c3fb27SDimitry Andric if (SinkCandidate == FOR) 81006c3fb27SDimitry Andric continue; 81106c3fb27SDimitry Andric 81206c3fb27SDimitry Andric SinkCandidate->moveAfter(Previous); 81306c3fb27SDimitry Andric Previous = SinkCandidate; 81406c3fb27SDimitry Andric } 81506c3fb27SDimitry Andric return true; 81606c3fb27SDimitry Andric } 81706c3fb27SDimitry Andric 81806c3fb27SDimitry Andric bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, 819*0fca6ea1SDimitry Andric VPBuilder &LoopBuilder) { 82006c3fb27SDimitry Andric VPDominatorTree VPDT; 82106c3fb27SDimitry Andric VPDT.recalculate(Plan); 82206c3fb27SDimitry Andric 82306c3fb27SDimitry Andric SmallVector<VPFirstOrderRecurrencePHIRecipe *> RecurrencePhis; 82406c3fb27SDimitry Andric for (VPRecipeBase &R : 82506c3fb27SDimitry Andric Plan.getVectorLoopRegion()->getEntry()->getEntryBasicBlock()->phis()) 82606c3fb27SDimitry Andric if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R)) 82706c3fb27SDimitry Andric RecurrencePhis.push_back(FOR); 82806c3fb27SDimitry Andric 829*0fca6ea1SDimitry Andric VPBasicBlock *MiddleVPBB = 830*0fca6ea1SDimitry Andric cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor()); 831*0fca6ea1SDimitry Andric VPBuilder MiddleBuilder; 832*0fca6ea1SDimitry Andric // Set insert point so new recipes are inserted before terminator and 833*0fca6ea1SDimitry Andric // condition, if there is either the former or both. 834*0fca6ea1SDimitry Andric if (auto *Term = 835*0fca6ea1SDimitry Andric dyn_cast_or_null<VPInstruction>(MiddleVPBB->getTerminator())) { 836*0fca6ea1SDimitry Andric if (auto *Cmp = dyn_cast<VPInstruction>(Term->getOperand(0))) 837*0fca6ea1SDimitry Andric MiddleBuilder.setInsertPoint(Cmp); 838*0fca6ea1SDimitry Andric else 839*0fca6ea1SDimitry Andric MiddleBuilder.setInsertPoint(Term); 840*0fca6ea1SDimitry Andric } else 841*0fca6ea1SDimitry Andric MiddleBuilder.setInsertPoint(MiddleVPBB); 842*0fca6ea1SDimitry Andric 84306c3fb27SDimitry Andric for (VPFirstOrderRecurrencePHIRecipe *FOR : RecurrencePhis) { 84406c3fb27SDimitry Andric SmallPtrSet<VPFirstOrderRecurrencePHIRecipe *, 4> SeenPhis; 84506c3fb27SDimitry Andric VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe(); 84606c3fb27SDimitry Andric // Fixed-order recurrences do not contain cycles, so this loop is guaranteed 84706c3fb27SDimitry Andric // to terminate. 84806c3fb27SDimitry Andric while (auto *PrevPhi = 84906c3fb27SDimitry Andric dyn_cast_or_null<VPFirstOrderRecurrencePHIRecipe>(Previous)) { 85006c3fb27SDimitry Andric assert(PrevPhi->getParent() == FOR->getParent()); 85106c3fb27SDimitry Andric assert(SeenPhis.insert(PrevPhi).second); 85206c3fb27SDimitry Andric Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe(); 85306c3fb27SDimitry Andric } 85406c3fb27SDimitry Andric 85506c3fb27SDimitry Andric if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT)) 85606c3fb27SDimitry Andric return false; 85706c3fb27SDimitry Andric 85806c3fb27SDimitry Andric // Introduce a recipe to combine the incoming and previous values of a 85906c3fb27SDimitry Andric // fixed-order recurrence. 86006c3fb27SDimitry Andric VPBasicBlock *InsertBlock = Previous->getParent(); 86106c3fb27SDimitry Andric if (isa<VPHeaderPHIRecipe>(Previous)) 862*0fca6ea1SDimitry Andric LoopBuilder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi()); 86306c3fb27SDimitry Andric else 864*0fca6ea1SDimitry Andric LoopBuilder.setInsertPoint(InsertBlock, 865*0fca6ea1SDimitry Andric std::next(Previous->getIterator())); 86606c3fb27SDimitry Andric 86706c3fb27SDimitry Andric auto *RecurSplice = cast<VPInstruction>( 868*0fca6ea1SDimitry Andric LoopBuilder.createNaryOp(VPInstruction::FirstOrderRecurrenceSplice, 86906c3fb27SDimitry Andric {FOR, FOR->getBackedgeValue()})); 87006c3fb27SDimitry Andric 87106c3fb27SDimitry Andric FOR->replaceAllUsesWith(RecurSplice); 87206c3fb27SDimitry Andric // Set the first operand of RecurSplice to FOR again, after replacing 87306c3fb27SDimitry Andric // all users. 87406c3fb27SDimitry Andric RecurSplice->setOperand(0, FOR); 875*0fca6ea1SDimitry Andric 876*0fca6ea1SDimitry Andric // This is the second phase of vectorizing first-order recurrences. An 877*0fca6ea1SDimitry Andric // overview of the transformation is described below. Suppose we have the 878*0fca6ea1SDimitry Andric // following loop with some use after the loop of the last a[i-1], 879*0fca6ea1SDimitry Andric // 880*0fca6ea1SDimitry Andric // for (int i = 0; i < n; ++i) { 881*0fca6ea1SDimitry Andric // t = a[i - 1]; 882*0fca6ea1SDimitry Andric // b[i] = a[i] - t; 883*0fca6ea1SDimitry Andric // } 884*0fca6ea1SDimitry Andric // use t; 885*0fca6ea1SDimitry Andric // 886*0fca6ea1SDimitry Andric // There is a first-order recurrence on "a". For this loop, the shorthand 887*0fca6ea1SDimitry Andric // scalar IR looks like: 888*0fca6ea1SDimitry Andric // 889*0fca6ea1SDimitry Andric // scalar.ph: 890*0fca6ea1SDimitry Andric // s_init = a[-1] 891*0fca6ea1SDimitry Andric // br scalar.body 892*0fca6ea1SDimitry Andric // 893*0fca6ea1SDimitry Andric // scalar.body: 894*0fca6ea1SDimitry Andric // i = phi [0, scalar.ph], [i+1, scalar.body] 895*0fca6ea1SDimitry Andric // s1 = phi [s_init, scalar.ph], [s2, scalar.body] 896*0fca6ea1SDimitry Andric // s2 = a[i] 897*0fca6ea1SDimitry Andric // b[i] = s2 - s1 898*0fca6ea1SDimitry Andric // br cond, scalar.body, exit.block 899*0fca6ea1SDimitry Andric // 900*0fca6ea1SDimitry Andric // exit.block: 901*0fca6ea1SDimitry Andric // use = lcssa.phi [s1, scalar.body] 902*0fca6ea1SDimitry Andric // 903*0fca6ea1SDimitry Andric // In this example, s1 is a recurrence because it's value depends on the 904*0fca6ea1SDimitry Andric // previous iteration. In the first phase of vectorization, we created a 905*0fca6ea1SDimitry Andric // vector phi v1 for s1. We now complete the vectorization and produce the 906*0fca6ea1SDimitry Andric // shorthand vector IR shown below (for VF = 4, UF = 1). 907*0fca6ea1SDimitry Andric // 908*0fca6ea1SDimitry Andric // vector.ph: 909*0fca6ea1SDimitry Andric // v_init = vector(..., ..., ..., a[-1]) 910*0fca6ea1SDimitry Andric // br vector.body 911*0fca6ea1SDimitry Andric // 912*0fca6ea1SDimitry Andric // vector.body 913*0fca6ea1SDimitry Andric // i = phi [0, vector.ph], [i+4, vector.body] 914*0fca6ea1SDimitry Andric // v1 = phi [v_init, vector.ph], [v2, vector.body] 915*0fca6ea1SDimitry Andric // v2 = a[i, i+1, i+2, i+3]; 916*0fca6ea1SDimitry Andric // v3 = vector(v1(3), v2(0, 1, 2)) 917*0fca6ea1SDimitry Andric // b[i, i+1, i+2, i+3] = v2 - v3 918*0fca6ea1SDimitry Andric // br cond, vector.body, middle.block 919*0fca6ea1SDimitry Andric // 920*0fca6ea1SDimitry Andric // middle.block: 921*0fca6ea1SDimitry Andric // s_penultimate = v2(2) = v3(3) 922*0fca6ea1SDimitry Andric // s_resume = v2(3) 923*0fca6ea1SDimitry Andric // br cond, scalar.ph, exit.block 924*0fca6ea1SDimitry Andric // 925*0fca6ea1SDimitry Andric // scalar.ph: 926*0fca6ea1SDimitry Andric // s_init' = phi [s_resume, middle.block], [s_init, otherwise] 927*0fca6ea1SDimitry Andric // br scalar.body 928*0fca6ea1SDimitry Andric // 929*0fca6ea1SDimitry Andric // scalar.body: 930*0fca6ea1SDimitry Andric // i = phi [0, scalar.ph], [i+1, scalar.body] 931*0fca6ea1SDimitry Andric // s1 = phi [s_init', scalar.ph], [s2, scalar.body] 932*0fca6ea1SDimitry Andric // s2 = a[i] 933*0fca6ea1SDimitry Andric // b[i] = s2 - s1 934*0fca6ea1SDimitry Andric // br cond, scalar.body, exit.block 935*0fca6ea1SDimitry Andric // 936*0fca6ea1SDimitry Andric // exit.block: 937*0fca6ea1SDimitry Andric // lo = lcssa.phi [s1, scalar.body], [s.penultimate, middle.block] 938*0fca6ea1SDimitry Andric // 939*0fca6ea1SDimitry Andric // After execution completes the vector loop, we extract the next value of 940*0fca6ea1SDimitry Andric // the recurrence (x) to use as the initial value in the scalar loop. This 941*0fca6ea1SDimitry Andric // is modeled by ExtractFromEnd. 942*0fca6ea1SDimitry Andric Type *IntTy = Plan.getCanonicalIV()->getScalarType(); 943*0fca6ea1SDimitry Andric 944*0fca6ea1SDimitry Andric // Extract the penultimate value of the recurrence and update VPLiveOut 945*0fca6ea1SDimitry Andric // users of the recurrence splice. Note that the extract of the final value 946*0fca6ea1SDimitry Andric // used to resume in the scalar loop is created earlier during VPlan 947*0fca6ea1SDimitry Andric // construction. 948*0fca6ea1SDimitry Andric auto *Penultimate = cast<VPInstruction>(MiddleBuilder.createNaryOp( 949*0fca6ea1SDimitry Andric VPInstruction::ExtractFromEnd, 950*0fca6ea1SDimitry Andric {FOR->getBackedgeValue(), 951*0fca6ea1SDimitry Andric Plan.getOrAddLiveIn(ConstantInt::get(IntTy, 2))}, 952*0fca6ea1SDimitry Andric {}, "vector.recur.extract.for.phi")); 953*0fca6ea1SDimitry Andric RecurSplice->replaceUsesWithIf( 954*0fca6ea1SDimitry Andric Penultimate, [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); }); 95506c3fb27SDimitry Andric } 95606c3fb27SDimitry Andric return true; 95706c3fb27SDimitry Andric } 95806c3fb27SDimitry Andric 959*0fca6ea1SDimitry Andric static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) { 960*0fca6ea1SDimitry Andric SetVector<VPUser *> Users(V->user_begin(), V->user_end()); 961*0fca6ea1SDimitry Andric for (unsigned I = 0; I != Users.size(); ++I) { 962*0fca6ea1SDimitry Andric VPRecipeBase *Cur = dyn_cast<VPRecipeBase>(Users[I]); 963*0fca6ea1SDimitry Andric if (!Cur || isa<VPHeaderPHIRecipe>(Cur)) 964*0fca6ea1SDimitry Andric continue; 965*0fca6ea1SDimitry Andric for (VPValue *V : Cur->definedValues()) 966*0fca6ea1SDimitry Andric Users.insert(V->user_begin(), V->user_end()); 967*0fca6ea1SDimitry Andric } 968*0fca6ea1SDimitry Andric return Users.takeVector(); 969*0fca6ea1SDimitry Andric } 970*0fca6ea1SDimitry Andric 97106c3fb27SDimitry Andric void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) { 97206c3fb27SDimitry Andric for (VPRecipeBase &R : 97306c3fb27SDimitry Andric Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { 97406c3fb27SDimitry Andric auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R); 97506c3fb27SDimitry Andric if (!PhiR) 97606c3fb27SDimitry Andric continue; 97706c3fb27SDimitry Andric const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); 97806c3fb27SDimitry Andric RecurKind RK = RdxDesc.getRecurrenceKind(); 97906c3fb27SDimitry Andric if (RK != RecurKind::Add && RK != RecurKind::Mul) 98006c3fb27SDimitry Andric continue; 98106c3fb27SDimitry Andric 982*0fca6ea1SDimitry Andric for (VPUser *U : collectUsersRecursively(PhiR)) 983*0fca6ea1SDimitry Andric if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(U)) { 98406c3fb27SDimitry Andric RecWithFlags->dropPoisonGeneratingFlags(); 98506c3fb27SDimitry Andric } 98606c3fb27SDimitry Andric } 98706c3fb27SDimitry Andric } 9885f757f3fSDimitry Andric 9895f757f3fSDimitry Andric /// Try to simplify recipe \p R. 9905f757f3fSDimitry Andric static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { 991*0fca6ea1SDimitry Andric using namespace llvm::VPlanPatternMatch; 992*0fca6ea1SDimitry Andric // Try to remove redundant blend recipes. 993*0fca6ea1SDimitry Andric if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) { 994*0fca6ea1SDimitry Andric VPValue *Inc0 = Blend->getIncomingValue(0); 995*0fca6ea1SDimitry Andric for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I) 996*0fca6ea1SDimitry Andric if (Inc0 != Blend->getIncomingValue(I) && 997*0fca6ea1SDimitry Andric !match(Blend->getMask(I), m_False())) 998*0fca6ea1SDimitry Andric return; 999*0fca6ea1SDimitry Andric Blend->replaceAllUsesWith(Inc0); 1000*0fca6ea1SDimitry Andric Blend->eraseFromParent(); 1001*0fca6ea1SDimitry Andric return; 10025f757f3fSDimitry Andric } 1003*0fca6ea1SDimitry Andric 1004*0fca6ea1SDimitry Andric VPValue *A; 1005*0fca6ea1SDimitry Andric if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) { 10065f757f3fSDimitry Andric VPValue *Trunc = R.getVPSingleValue(); 10075f757f3fSDimitry Andric Type *TruncTy = TypeInfo.inferScalarType(Trunc); 10085f757f3fSDimitry Andric Type *ATy = TypeInfo.inferScalarType(A); 10095f757f3fSDimitry Andric if (TruncTy == ATy) { 10105f757f3fSDimitry Andric Trunc->replaceAllUsesWith(A); 10111db9f3b2SDimitry Andric } else { 10121db9f3b2SDimitry Andric // Don't replace a scalarizing recipe with a widened cast. 10131db9f3b2SDimitry Andric if (isa<VPReplicateRecipe>(&R)) 1014*0fca6ea1SDimitry Andric return; 10151db9f3b2SDimitry Andric if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) { 1016*0fca6ea1SDimitry Andric 1017*0fca6ea1SDimitry Andric unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue())) 1018*0fca6ea1SDimitry Andric ? Instruction::SExt 1019*0fca6ea1SDimitry Andric : Instruction::ZExt; 10205f757f3fSDimitry Andric auto *VPC = 10215f757f3fSDimitry Andric new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy); 1022*0fca6ea1SDimitry Andric if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) { 1023*0fca6ea1SDimitry Andric // UnderlyingExt has distinct return type, used to retain legacy cost. 1024*0fca6ea1SDimitry Andric VPC->setUnderlyingValue(UnderlyingExt); 1025*0fca6ea1SDimitry Andric } 10265f757f3fSDimitry Andric VPC->insertBefore(&R); 10275f757f3fSDimitry Andric Trunc->replaceAllUsesWith(VPC); 10285f757f3fSDimitry Andric } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) { 10295f757f3fSDimitry Andric auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy); 10305f757f3fSDimitry Andric VPC->insertBefore(&R); 10315f757f3fSDimitry Andric Trunc->replaceAllUsesWith(VPC); 10325f757f3fSDimitry Andric } 10331db9f3b2SDimitry Andric } 10345f757f3fSDimitry Andric #ifndef NDEBUG 10355f757f3fSDimitry Andric // Verify that the cached type info is for both A and its users is still 10365f757f3fSDimitry Andric // accurate by comparing it to freshly computed types. 1037*0fca6ea1SDimitry Andric VPTypeAnalysis TypeInfo2( 1038*0fca6ea1SDimitry Andric R.getParent()->getPlan()->getCanonicalIV()->getScalarType(), 1039*0fca6ea1SDimitry Andric TypeInfo.getContext()); 10405f757f3fSDimitry Andric assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A)); 10415f757f3fSDimitry Andric for (VPUser *U : A->users()) { 10425f757f3fSDimitry Andric auto *R = dyn_cast<VPRecipeBase>(U); 10435f757f3fSDimitry Andric if (!R) 10445f757f3fSDimitry Andric continue; 10455f757f3fSDimitry Andric for (VPValue *VPV : R->definedValues()) 10465f757f3fSDimitry Andric assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV)); 10475f757f3fSDimitry Andric } 10485f757f3fSDimitry Andric #endif 10495f757f3fSDimitry Andric } 1050*0fca6ea1SDimitry Andric 1051*0fca6ea1SDimitry Andric // Simplify (X && Y) || (X && !Y) -> X. 1052*0fca6ea1SDimitry Andric // TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X 1053*0fca6ea1SDimitry Andric // && (Y || Z) and (X || !X) into true. This requires queuing newly created 1054*0fca6ea1SDimitry Andric // recipes to be visited during simplification. 1055*0fca6ea1SDimitry Andric VPValue *X, *Y, *X1, *Y1; 1056*0fca6ea1SDimitry Andric if (match(&R, 1057*0fca6ea1SDimitry Andric m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)), 1058*0fca6ea1SDimitry Andric m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) && 1059*0fca6ea1SDimitry Andric X == X1 && Y == Y1) { 1060*0fca6ea1SDimitry Andric R.getVPSingleValue()->replaceAllUsesWith(X); 1061*0fca6ea1SDimitry Andric return; 10625f757f3fSDimitry Andric } 1063*0fca6ea1SDimitry Andric 1064*0fca6ea1SDimitry Andric if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1)))) 1065*0fca6ea1SDimitry Andric return R.getVPSingleValue()->replaceAllUsesWith(A); 10665f757f3fSDimitry Andric } 10675f757f3fSDimitry Andric 10685f757f3fSDimitry Andric /// Try to simplify the recipes in \p Plan. 10695f757f3fSDimitry Andric static void simplifyRecipes(VPlan &Plan, LLVMContext &Ctx) { 10705f757f3fSDimitry Andric ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( 10715f757f3fSDimitry Andric Plan.getEntry()); 1072*0fca6ea1SDimitry Andric VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(), Ctx); 10735f757f3fSDimitry Andric for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) { 10745f757f3fSDimitry Andric for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { 10755f757f3fSDimitry Andric simplifyRecipe(R, TypeInfo); 10765f757f3fSDimitry Andric } 10775f757f3fSDimitry Andric } 10785f757f3fSDimitry Andric } 10795f757f3fSDimitry Andric 10805f757f3fSDimitry Andric void VPlanTransforms::truncateToMinimalBitwidths( 10815f757f3fSDimitry Andric VPlan &Plan, const MapVector<Instruction *, uint64_t> &MinBWs, 10825f757f3fSDimitry Andric LLVMContext &Ctx) { 10835f757f3fSDimitry Andric #ifndef NDEBUG 10845f757f3fSDimitry Andric // Count the processed recipes and cross check the count later with MinBWs 10855f757f3fSDimitry Andric // size, to make sure all entries in MinBWs have been handled. 10865f757f3fSDimitry Andric unsigned NumProcessedRecipes = 0; 10875f757f3fSDimitry Andric #endif 10885f757f3fSDimitry Andric // Keep track of created truncates, so they can be re-used. Note that we 10895f757f3fSDimitry Andric // cannot use RAUW after creating a new truncate, as this would could make 10905f757f3fSDimitry Andric // other uses have different types for their operands, making them invalidly 10915f757f3fSDimitry Andric // typed. 10925f757f3fSDimitry Andric DenseMap<VPValue *, VPWidenCastRecipe *> ProcessedTruncs; 1093*0fca6ea1SDimitry Andric VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(), Ctx); 10945f757f3fSDimitry Andric VPBasicBlock *PH = Plan.getEntry(); 10955f757f3fSDimitry Andric for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( 10965f757f3fSDimitry Andric vp_depth_first_deep(Plan.getVectorLoopRegion()))) { 10975f757f3fSDimitry Andric for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { 10985f757f3fSDimitry Andric if (!isa<VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe, 1099*0fca6ea1SDimitry Andric VPWidenSelectRecipe, VPWidenLoadRecipe>(&R)) 11005f757f3fSDimitry Andric continue; 11015f757f3fSDimitry Andric 11025f757f3fSDimitry Andric VPValue *ResultVPV = R.getVPSingleValue(); 11035f757f3fSDimitry Andric auto *UI = cast_or_null<Instruction>(ResultVPV->getUnderlyingValue()); 11045f757f3fSDimitry Andric unsigned NewResSizeInBits = MinBWs.lookup(UI); 11055f757f3fSDimitry Andric if (!NewResSizeInBits) 11065f757f3fSDimitry Andric continue; 11075f757f3fSDimitry Andric 11085f757f3fSDimitry Andric #ifndef NDEBUG 11095f757f3fSDimitry Andric NumProcessedRecipes++; 11105f757f3fSDimitry Andric #endif 11115f757f3fSDimitry Andric // If the value wasn't vectorized, we must maintain the original scalar 11125f757f3fSDimitry Andric // type. Skip those here, after incrementing NumProcessedRecipes. Also 11135f757f3fSDimitry Andric // skip casts which do not need to be handled explicitly here, as 11145f757f3fSDimitry Andric // redundant casts will be removed during recipe simplification. 11155f757f3fSDimitry Andric if (isa<VPReplicateRecipe, VPWidenCastRecipe>(&R)) { 11165f757f3fSDimitry Andric #ifndef NDEBUG 11175f757f3fSDimitry Andric // If any of the operands is a live-in and not used by VPWidenRecipe or 11185f757f3fSDimitry Andric // VPWidenSelectRecipe, but in MinBWs, make sure it is counted as 11195f757f3fSDimitry Andric // processed as well. When MinBWs is currently constructed, there is no 11205f757f3fSDimitry Andric // information about whether recipes are widened or replicated and in 11215f757f3fSDimitry Andric // case they are reciplicated the operands are not truncated. Counting 11225f757f3fSDimitry Andric // them them here ensures we do not miss any recipes in MinBWs. 11235f757f3fSDimitry Andric // TODO: Remove once the analysis is done on VPlan. 11245f757f3fSDimitry Andric for (VPValue *Op : R.operands()) { 11255f757f3fSDimitry Andric if (!Op->isLiveIn()) 11265f757f3fSDimitry Andric continue; 11275f757f3fSDimitry Andric auto *UV = dyn_cast_or_null<Instruction>(Op->getUnderlyingValue()); 11285f757f3fSDimitry Andric if (UV && MinBWs.contains(UV) && !ProcessedTruncs.contains(Op) && 11295f757f3fSDimitry Andric all_of(Op->users(), [](VPUser *U) { 11305f757f3fSDimitry Andric return !isa<VPWidenRecipe, VPWidenSelectRecipe>(U); 11315f757f3fSDimitry Andric })) { 11325f757f3fSDimitry Andric // Add an entry to ProcessedTruncs to avoid counting the same 11335f757f3fSDimitry Andric // operand multiple times. 11345f757f3fSDimitry Andric ProcessedTruncs[Op] = nullptr; 11355f757f3fSDimitry Andric NumProcessedRecipes += 1; 11365f757f3fSDimitry Andric } 11375f757f3fSDimitry Andric } 11385f757f3fSDimitry Andric #endif 11395f757f3fSDimitry Andric continue; 11405f757f3fSDimitry Andric } 11415f757f3fSDimitry Andric 11425f757f3fSDimitry Andric Type *OldResTy = TypeInfo.inferScalarType(ResultVPV); 11435f757f3fSDimitry Andric unsigned OldResSizeInBits = OldResTy->getScalarSizeInBits(); 11445f757f3fSDimitry Andric assert(OldResTy->isIntegerTy() && "only integer types supported"); 11455f757f3fSDimitry Andric (void)OldResSizeInBits; 11465f757f3fSDimitry Andric 11475f757f3fSDimitry Andric auto *NewResTy = IntegerType::get(Ctx, NewResSizeInBits); 11485f757f3fSDimitry Andric 11497a6dacacSDimitry Andric // Any wrapping introduced by shrinking this operation shouldn't be 11507a6dacacSDimitry Andric // considered undefined behavior. So, we can't unconditionally copy 11517a6dacacSDimitry Andric // arithmetic wrapping flags to VPW. 11527a6dacacSDimitry Andric if (auto *VPW = dyn_cast<VPRecipeWithIRFlags>(&R)) 11537a6dacacSDimitry Andric VPW->dropPoisonGeneratingFlags(); 11547a6dacacSDimitry Andric 1155*0fca6ea1SDimitry Andric using namespace llvm::VPlanPatternMatch; 1156*0fca6ea1SDimitry Andric if (OldResSizeInBits != NewResSizeInBits && 1157*0fca6ea1SDimitry Andric !match(&R, m_Binary<Instruction::ICmp>(m_VPValue(), m_VPValue()))) { 11587a6dacacSDimitry Andric // Extend result to original width. 1159*0fca6ea1SDimitry Andric auto *Ext = 1160*0fca6ea1SDimitry Andric new VPWidenCastRecipe(Instruction::ZExt, ResultVPV, OldResTy); 11617a6dacacSDimitry Andric Ext->insertAfter(&R); 11627a6dacacSDimitry Andric ResultVPV->replaceAllUsesWith(Ext); 11637a6dacacSDimitry Andric Ext->setOperand(0, ResultVPV); 1164*0fca6ea1SDimitry Andric assert(OldResSizeInBits > NewResSizeInBits && "Nothing to shrink?"); 1165*0fca6ea1SDimitry Andric } else 1166*0fca6ea1SDimitry Andric assert( 1167*0fca6ea1SDimitry Andric match(&R, m_Binary<Instruction::ICmp>(m_VPValue(), m_VPValue())) && 1168*0fca6ea1SDimitry Andric "Only ICmps should not need extending the result."); 11697a6dacacSDimitry Andric 1170*0fca6ea1SDimitry Andric assert(!isa<VPWidenStoreRecipe>(&R) && "stores cannot be narrowed"); 1171*0fca6ea1SDimitry Andric if (isa<VPWidenLoadRecipe>(&R)) 11727a6dacacSDimitry Andric continue; 11737a6dacacSDimitry Andric 11745f757f3fSDimitry Andric // Shrink operands by introducing truncates as needed. 11755f757f3fSDimitry Andric unsigned StartIdx = isa<VPWidenSelectRecipe>(&R) ? 1 : 0; 11765f757f3fSDimitry Andric for (unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) { 11775f757f3fSDimitry Andric auto *Op = R.getOperand(Idx); 11785f757f3fSDimitry Andric unsigned OpSizeInBits = 11795f757f3fSDimitry Andric TypeInfo.inferScalarType(Op)->getScalarSizeInBits(); 11805f757f3fSDimitry Andric if (OpSizeInBits == NewResSizeInBits) 11815f757f3fSDimitry Andric continue; 11825f757f3fSDimitry Andric assert(OpSizeInBits > NewResSizeInBits && "nothing to truncate"); 11835f757f3fSDimitry Andric auto [ProcessedIter, IterIsEmpty] = 11845f757f3fSDimitry Andric ProcessedTruncs.insert({Op, nullptr}); 11855f757f3fSDimitry Andric VPWidenCastRecipe *NewOp = 11865f757f3fSDimitry Andric IterIsEmpty 11875f757f3fSDimitry Andric ? new VPWidenCastRecipe(Instruction::Trunc, Op, NewResTy) 11885f757f3fSDimitry Andric : ProcessedIter->second; 11895f757f3fSDimitry Andric R.setOperand(Idx, NewOp); 11905f757f3fSDimitry Andric if (!IterIsEmpty) 11915f757f3fSDimitry Andric continue; 11925f757f3fSDimitry Andric ProcessedIter->second = NewOp; 11935f757f3fSDimitry Andric if (!Op->isLiveIn()) { 11945f757f3fSDimitry Andric NewOp->insertBefore(&R); 11955f757f3fSDimitry Andric } else { 11965f757f3fSDimitry Andric PH->appendRecipe(NewOp); 11975f757f3fSDimitry Andric #ifndef NDEBUG 11985f757f3fSDimitry Andric auto *OpInst = dyn_cast<Instruction>(Op->getLiveInIRValue()); 11995f757f3fSDimitry Andric bool IsContained = MinBWs.contains(OpInst); 12005f757f3fSDimitry Andric NumProcessedRecipes += IsContained; 12015f757f3fSDimitry Andric #endif 12025f757f3fSDimitry Andric } 12035f757f3fSDimitry Andric } 12045f757f3fSDimitry Andric 12055f757f3fSDimitry Andric } 12065f757f3fSDimitry Andric } 12075f757f3fSDimitry Andric 12085f757f3fSDimitry Andric assert(MinBWs.size() == NumProcessedRecipes && 12095f757f3fSDimitry Andric "some entries in MinBWs haven't been processed"); 12105f757f3fSDimitry Andric } 12115f757f3fSDimitry Andric 12125f757f3fSDimitry Andric void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) { 12135f757f3fSDimitry Andric removeRedundantCanonicalIVs(Plan); 12145f757f3fSDimitry Andric removeRedundantInductionCasts(Plan); 12155f757f3fSDimitry Andric 12165f757f3fSDimitry Andric simplifyRecipes(Plan, SE.getContext()); 1217*0fca6ea1SDimitry Andric legalizeAndOptimizeInductions(Plan, SE); 12185f757f3fSDimitry Andric removeDeadRecipes(Plan); 12195f757f3fSDimitry Andric 12205f757f3fSDimitry Andric createAndOptimizeReplicateRegions(Plan); 12215f757f3fSDimitry Andric 12225f757f3fSDimitry Andric removeRedundantExpandSCEVRecipes(Plan); 12235f757f3fSDimitry Andric mergeBlocksIntoPredecessors(Plan); 12245f757f3fSDimitry Andric } 12255f757f3fSDimitry Andric 12265f757f3fSDimitry Andric // Add a VPActiveLaneMaskPHIRecipe and related recipes to \p Plan and replace 12275f757f3fSDimitry Andric // the loop terminator with a branch-on-cond recipe with the negated 12285f757f3fSDimitry Andric // active-lane-mask as operand. Note that this turns the loop into an 12295f757f3fSDimitry Andric // uncountable one. Only the existing terminator is replaced, all other existing 12305f757f3fSDimitry Andric // recipes/users remain unchanged, except for poison-generating flags being 12315f757f3fSDimitry Andric // dropped from the canonical IV increment. Return the created 12325f757f3fSDimitry Andric // VPActiveLaneMaskPHIRecipe. 12335f757f3fSDimitry Andric // 12345f757f3fSDimitry Andric // The function uses the following definitions: 12355f757f3fSDimitry Andric // 12365f757f3fSDimitry Andric // %TripCount = DataWithControlFlowWithoutRuntimeCheck ? 12375f757f3fSDimitry Andric // calculate-trip-count-minus-VF (original TC) : original TC 12385f757f3fSDimitry Andric // %IncrementValue = DataWithControlFlowWithoutRuntimeCheck ? 12395f757f3fSDimitry Andric // CanonicalIVPhi : CanonicalIVIncrement 12405f757f3fSDimitry Andric // %StartV is the canonical induction start value. 12415f757f3fSDimitry Andric // 12425f757f3fSDimitry Andric // The function adds the following recipes: 12435f757f3fSDimitry Andric // 12445f757f3fSDimitry Andric // vector.ph: 12455f757f3fSDimitry Andric // %TripCount = calculate-trip-count-minus-VF (original TC) 12465f757f3fSDimitry Andric // [if DataWithControlFlowWithoutRuntimeCheck] 12475f757f3fSDimitry Andric // %EntryInc = canonical-iv-increment-for-part %StartV 12485f757f3fSDimitry Andric // %EntryALM = active-lane-mask %EntryInc, %TripCount 12495f757f3fSDimitry Andric // 12505f757f3fSDimitry Andric // vector.body: 12515f757f3fSDimitry Andric // ... 12525f757f3fSDimitry Andric // %P = active-lane-mask-phi [ %EntryALM, %vector.ph ], [ %ALM, %vector.body ] 12535f757f3fSDimitry Andric // ... 12545f757f3fSDimitry Andric // %InLoopInc = canonical-iv-increment-for-part %IncrementValue 12555f757f3fSDimitry Andric // %ALM = active-lane-mask %InLoopInc, TripCount 12565f757f3fSDimitry Andric // %Negated = Not %ALM 12575f757f3fSDimitry Andric // branch-on-cond %Negated 12585f757f3fSDimitry Andric // 12595f757f3fSDimitry Andric static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( 12605f757f3fSDimitry Andric VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) { 12615f757f3fSDimitry Andric VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); 12625f757f3fSDimitry Andric VPBasicBlock *EB = TopRegion->getExitingBasicBlock(); 12635f757f3fSDimitry Andric auto *CanonicalIVPHI = Plan.getCanonicalIV(); 12645f757f3fSDimitry Andric VPValue *StartV = CanonicalIVPHI->getStartValue(); 12655f757f3fSDimitry Andric 12665f757f3fSDimitry Andric auto *CanonicalIVIncrement = 12675f757f3fSDimitry Andric cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue()); 12685f757f3fSDimitry Andric // TODO: Check if dropping the flags is needed if 12695f757f3fSDimitry Andric // !DataAndControlFlowWithoutRuntimeCheck. 12705f757f3fSDimitry Andric CanonicalIVIncrement->dropPoisonGeneratingFlags(); 12715f757f3fSDimitry Andric DebugLoc DL = CanonicalIVIncrement->getDebugLoc(); 12725f757f3fSDimitry Andric // We can't use StartV directly in the ActiveLaneMask VPInstruction, since 12735f757f3fSDimitry Andric // we have to take unrolling into account. Each part needs to start at 12745f757f3fSDimitry Andric // Part * VF 12755f757f3fSDimitry Andric auto *VecPreheader = cast<VPBasicBlock>(TopRegion->getSinglePredecessor()); 12765f757f3fSDimitry Andric VPBuilder Builder(VecPreheader); 12775f757f3fSDimitry Andric 12785f757f3fSDimitry Andric // Create the ActiveLaneMask instruction using the correct start values. 12795f757f3fSDimitry Andric VPValue *TC = Plan.getTripCount(); 12805f757f3fSDimitry Andric 12815f757f3fSDimitry Andric VPValue *TripCount, *IncrementValue; 12825f757f3fSDimitry Andric if (!DataAndControlFlowWithoutRuntimeCheck) { 12835f757f3fSDimitry Andric // When the loop is guarded by a runtime overflow check for the loop 12845f757f3fSDimitry Andric // induction variable increment by VF, we can increment the value before 12855f757f3fSDimitry Andric // the get.active.lane mask and use the unmodified tripcount. 12865f757f3fSDimitry Andric IncrementValue = CanonicalIVIncrement; 12875f757f3fSDimitry Andric TripCount = TC; 12885f757f3fSDimitry Andric } else { 12895f757f3fSDimitry Andric // When avoiding a runtime check, the active.lane.mask inside the loop 12905f757f3fSDimitry Andric // uses a modified trip count and the induction variable increment is 12915f757f3fSDimitry Andric // done after the active.lane.mask intrinsic is called. 12925f757f3fSDimitry Andric IncrementValue = CanonicalIVPHI; 12935f757f3fSDimitry Andric TripCount = Builder.createNaryOp(VPInstruction::CalculateTripCountMinusVF, 12945f757f3fSDimitry Andric {TC}, DL); 12955f757f3fSDimitry Andric } 12965f757f3fSDimitry Andric auto *EntryIncrement = Builder.createOverflowingOp( 12975f757f3fSDimitry Andric VPInstruction::CanonicalIVIncrementForPart, {StartV}, {false, false}, DL, 12985f757f3fSDimitry Andric "index.part.next"); 12995f757f3fSDimitry Andric 13005f757f3fSDimitry Andric // Create the active lane mask instruction in the VPlan preheader. 13015f757f3fSDimitry Andric auto *EntryALM = 13025f757f3fSDimitry Andric Builder.createNaryOp(VPInstruction::ActiveLaneMask, {EntryIncrement, TC}, 13035f757f3fSDimitry Andric DL, "active.lane.mask.entry"); 13045f757f3fSDimitry Andric 13055f757f3fSDimitry Andric // Now create the ActiveLaneMaskPhi recipe in the main loop using the 13065f757f3fSDimitry Andric // preheader ActiveLaneMask instruction. 13075f757f3fSDimitry Andric auto LaneMaskPhi = new VPActiveLaneMaskPHIRecipe(EntryALM, DebugLoc()); 13085f757f3fSDimitry Andric LaneMaskPhi->insertAfter(CanonicalIVPHI); 13095f757f3fSDimitry Andric 13105f757f3fSDimitry Andric // Create the active lane mask for the next iteration of the loop before the 13115f757f3fSDimitry Andric // original terminator. 13125f757f3fSDimitry Andric VPRecipeBase *OriginalTerminator = EB->getTerminator(); 13135f757f3fSDimitry Andric Builder.setInsertPoint(OriginalTerminator); 13145f757f3fSDimitry Andric auto *InLoopIncrement = 13155f757f3fSDimitry Andric Builder.createOverflowingOp(VPInstruction::CanonicalIVIncrementForPart, 13165f757f3fSDimitry Andric {IncrementValue}, {false, false}, DL); 13175f757f3fSDimitry Andric auto *ALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask, 13185f757f3fSDimitry Andric {InLoopIncrement, TripCount}, DL, 13195f757f3fSDimitry Andric "active.lane.mask.next"); 13205f757f3fSDimitry Andric LaneMaskPhi->addOperand(ALM); 13215f757f3fSDimitry Andric 13225f757f3fSDimitry Andric // Replace the original terminator with BranchOnCond. We have to invert the 13235f757f3fSDimitry Andric // mask here because a true condition means jumping to the exit block. 13245f757f3fSDimitry Andric auto *NotMask = Builder.createNot(ALM, DL); 13255f757f3fSDimitry Andric Builder.createNaryOp(VPInstruction::BranchOnCond, {NotMask}, DL); 13265f757f3fSDimitry Andric OriginalTerminator->eraseFromParent(); 13275f757f3fSDimitry Andric return LaneMaskPhi; 13285f757f3fSDimitry Andric } 13295f757f3fSDimitry Andric 1330*0fca6ea1SDimitry Andric /// Collect all VPValues representing a header mask through the (ICMP_ULE, 1331*0fca6ea1SDimitry Andric /// WideCanonicalIV, backedge-taken-count) pattern. 1332*0fca6ea1SDimitry Andric /// TODO: Introduce explicit recipe for header-mask instead of searching 1333*0fca6ea1SDimitry Andric /// for the header-mask pattern manually. 1334*0fca6ea1SDimitry Andric static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) { 1335*0fca6ea1SDimitry Andric SmallVector<VPValue *> WideCanonicalIVs; 1336*0fca6ea1SDimitry Andric auto *FoundWidenCanonicalIVUser = 1337*0fca6ea1SDimitry Andric find_if(Plan.getCanonicalIV()->users(), 1338*0fca6ea1SDimitry Andric [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); }); 1339*0fca6ea1SDimitry Andric assert(count_if(Plan.getCanonicalIV()->users(), 1340*0fca6ea1SDimitry Andric [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); }) <= 1341*0fca6ea1SDimitry Andric 1 && 1342*0fca6ea1SDimitry Andric "Must have at most one VPWideCanonicalIVRecipe"); 1343*0fca6ea1SDimitry Andric if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) { 1344*0fca6ea1SDimitry Andric auto *WideCanonicalIV = 1345*0fca6ea1SDimitry Andric cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser); 1346*0fca6ea1SDimitry Andric WideCanonicalIVs.push_back(WideCanonicalIV); 1347*0fca6ea1SDimitry Andric } 1348*0fca6ea1SDimitry Andric 1349*0fca6ea1SDimitry Andric // Also include VPWidenIntOrFpInductionRecipes that represent a widened 1350*0fca6ea1SDimitry Andric // version of the canonical induction. 1351*0fca6ea1SDimitry Andric VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); 1352*0fca6ea1SDimitry Andric for (VPRecipeBase &Phi : HeaderVPBB->phis()) { 1353*0fca6ea1SDimitry Andric auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); 1354*0fca6ea1SDimitry Andric if (WidenOriginalIV && WidenOriginalIV->isCanonical()) 1355*0fca6ea1SDimitry Andric WideCanonicalIVs.push_back(WidenOriginalIV); 1356*0fca6ea1SDimitry Andric } 1357*0fca6ea1SDimitry Andric 1358*0fca6ea1SDimitry Andric // Walk users of wide canonical IVs and collect to all compares of the form 1359*0fca6ea1SDimitry Andric // (ICMP_ULE, WideCanonicalIV, backedge-taken-count). 1360*0fca6ea1SDimitry Andric SmallVector<VPValue *> HeaderMasks; 1361*0fca6ea1SDimitry Andric for (auto *Wide : WideCanonicalIVs) { 1362*0fca6ea1SDimitry Andric for (VPUser *U : SmallVector<VPUser *>(Wide->users())) { 1363*0fca6ea1SDimitry Andric auto *HeaderMask = dyn_cast<VPInstruction>(U); 1364*0fca6ea1SDimitry Andric if (!HeaderMask || !vputils::isHeaderMask(HeaderMask, Plan)) 1365*0fca6ea1SDimitry Andric continue; 1366*0fca6ea1SDimitry Andric 1367*0fca6ea1SDimitry Andric assert(HeaderMask->getOperand(0) == Wide && 1368*0fca6ea1SDimitry Andric "WidenCanonicalIV must be the first operand of the compare"); 1369*0fca6ea1SDimitry Andric HeaderMasks.push_back(HeaderMask); 1370*0fca6ea1SDimitry Andric } 1371*0fca6ea1SDimitry Andric } 1372*0fca6ea1SDimitry Andric return HeaderMasks; 1373*0fca6ea1SDimitry Andric } 1374*0fca6ea1SDimitry Andric 13755f757f3fSDimitry Andric void VPlanTransforms::addActiveLaneMask( 13765f757f3fSDimitry Andric VPlan &Plan, bool UseActiveLaneMaskForControlFlow, 13775f757f3fSDimitry Andric bool DataAndControlFlowWithoutRuntimeCheck) { 13785f757f3fSDimitry Andric assert((!DataAndControlFlowWithoutRuntimeCheck || 13795f757f3fSDimitry Andric UseActiveLaneMaskForControlFlow) && 13805f757f3fSDimitry Andric "DataAndControlFlowWithoutRuntimeCheck implies " 13815f757f3fSDimitry Andric "UseActiveLaneMaskForControlFlow"); 13825f757f3fSDimitry Andric 13835f757f3fSDimitry Andric auto FoundWidenCanonicalIVUser = 13845f757f3fSDimitry Andric find_if(Plan.getCanonicalIV()->users(), 13855f757f3fSDimitry Andric [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); }); 13865f757f3fSDimitry Andric assert(FoundWidenCanonicalIVUser && 13875f757f3fSDimitry Andric "Must have widened canonical IV when tail folding!"); 13885f757f3fSDimitry Andric auto *WideCanonicalIV = 13895f757f3fSDimitry Andric cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser); 13907a6dacacSDimitry Andric VPSingleDefRecipe *LaneMask; 13915f757f3fSDimitry Andric if (UseActiveLaneMaskForControlFlow) { 13925f757f3fSDimitry Andric LaneMask = addVPLaneMaskPhiAndUpdateExitBranch( 13935f757f3fSDimitry Andric Plan, DataAndControlFlowWithoutRuntimeCheck); 13945f757f3fSDimitry Andric } else { 1395*0fca6ea1SDimitry Andric VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV); 1396*0fca6ea1SDimitry Andric LaneMask = B.createNaryOp(VPInstruction::ActiveLaneMask, 1397*0fca6ea1SDimitry Andric {WideCanonicalIV, Plan.getTripCount()}, nullptr, 1398*0fca6ea1SDimitry Andric "active.lane.mask"); 13995f757f3fSDimitry Andric } 14005f757f3fSDimitry Andric 14015f757f3fSDimitry Andric // Walk users of WideCanonicalIV and replace all compares of the form 14025f757f3fSDimitry Andric // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an 14035f757f3fSDimitry Andric // active-lane-mask. 1404*0fca6ea1SDimitry Andric for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) 1405*0fca6ea1SDimitry Andric HeaderMask->replaceAllUsesWith(LaneMask); 1406*0fca6ea1SDimitry Andric } 1407*0fca6ea1SDimitry Andric 1408*0fca6ea1SDimitry Andric /// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and 1409*0fca6ea1SDimitry Andric /// replaces all uses except the canonical IV increment of 1410*0fca6ea1SDimitry Andric /// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe 1411*0fca6ea1SDimitry Andric /// is used only for loop iterations counting after this transformation. 1412*0fca6ea1SDimitry Andric /// 1413*0fca6ea1SDimitry Andric /// The function uses the following definitions: 1414*0fca6ea1SDimitry Andric /// %StartV is the canonical induction start value. 1415*0fca6ea1SDimitry Andric /// 1416*0fca6ea1SDimitry Andric /// The function adds the following recipes: 1417*0fca6ea1SDimitry Andric /// 1418*0fca6ea1SDimitry Andric /// vector.ph: 1419*0fca6ea1SDimitry Andric /// ... 1420*0fca6ea1SDimitry Andric /// 1421*0fca6ea1SDimitry Andric /// vector.body: 1422*0fca6ea1SDimitry Andric /// ... 1423*0fca6ea1SDimitry Andric /// %EVLPhi = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ], 1424*0fca6ea1SDimitry Andric /// [ %NextEVLIV, %vector.body ] 1425*0fca6ea1SDimitry Andric /// %VPEVL = EXPLICIT-VECTOR-LENGTH %EVLPhi, original TC 1426*0fca6ea1SDimitry Andric /// ... 1427*0fca6ea1SDimitry Andric /// %NextEVLIV = add IVSize (cast i32 %VPEVVL to IVSize), %EVLPhi 1428*0fca6ea1SDimitry Andric /// ... 1429*0fca6ea1SDimitry Andric /// 1430*0fca6ea1SDimitry Andric bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) { 1431*0fca6ea1SDimitry Andric VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock(); 1432*0fca6ea1SDimitry Andric // The transform updates all users of inductions to work based on EVL, instead 1433*0fca6ea1SDimitry Andric // of the VF directly. At the moment, widened inductions cannot be updated, so 1434*0fca6ea1SDimitry Andric // bail out if the plan contains any. 1435*0fca6ea1SDimitry Andric bool ContainsWidenInductions = any_of(Header->phis(), [](VPRecipeBase &Phi) { 1436*0fca6ea1SDimitry Andric return isa<VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe>( 1437*0fca6ea1SDimitry Andric &Phi); 1438*0fca6ea1SDimitry Andric }); 1439*0fca6ea1SDimitry Andric // FIXME: Remove this once we can transform (select header_mask, true_value, 1440*0fca6ea1SDimitry Andric // false_value) into vp.merge. 1441*0fca6ea1SDimitry Andric bool ContainsOutloopReductions = 1442*0fca6ea1SDimitry Andric any_of(Header->phis(), [&](VPRecipeBase &Phi) { 1443*0fca6ea1SDimitry Andric auto *R = dyn_cast<VPReductionPHIRecipe>(&Phi); 1444*0fca6ea1SDimitry Andric return R && !R->isInLoop(); 1445*0fca6ea1SDimitry Andric }); 1446*0fca6ea1SDimitry Andric if (ContainsWidenInductions || ContainsOutloopReductions) 1447*0fca6ea1SDimitry Andric return false; 1448*0fca6ea1SDimitry Andric 1449*0fca6ea1SDimitry Andric auto *CanonicalIVPHI = Plan.getCanonicalIV(); 1450*0fca6ea1SDimitry Andric VPValue *StartV = CanonicalIVPHI->getStartValue(); 1451*0fca6ea1SDimitry Andric 1452*0fca6ea1SDimitry Andric // Create the ExplicitVectorLengthPhi recipe in the main loop. 1453*0fca6ea1SDimitry Andric auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc()); 1454*0fca6ea1SDimitry Andric EVLPhi->insertAfter(CanonicalIVPHI); 1455*0fca6ea1SDimitry Andric auto *VPEVL = new VPInstruction(VPInstruction::ExplicitVectorLength, 1456*0fca6ea1SDimitry Andric {EVLPhi, Plan.getTripCount()}); 1457*0fca6ea1SDimitry Andric VPEVL->insertBefore(*Header, Header->getFirstNonPhi()); 1458*0fca6ea1SDimitry Andric 1459*0fca6ea1SDimitry Andric auto *CanonicalIVIncrement = 1460*0fca6ea1SDimitry Andric cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue()); 1461*0fca6ea1SDimitry Andric VPSingleDefRecipe *OpVPEVL = VPEVL; 1462*0fca6ea1SDimitry Andric if (unsigned IVSize = CanonicalIVPHI->getScalarType()->getScalarSizeInBits(); 1463*0fca6ea1SDimitry Andric IVSize != 32) { 1464*0fca6ea1SDimitry Andric OpVPEVL = new VPScalarCastRecipe(IVSize < 32 ? Instruction::Trunc 1465*0fca6ea1SDimitry Andric : Instruction::ZExt, 1466*0fca6ea1SDimitry Andric OpVPEVL, CanonicalIVPHI->getScalarType()); 1467*0fca6ea1SDimitry Andric OpVPEVL->insertBefore(CanonicalIVIncrement); 1468*0fca6ea1SDimitry Andric } 1469*0fca6ea1SDimitry Andric auto *NextEVLIV = 1470*0fca6ea1SDimitry Andric new VPInstruction(Instruction::Add, {OpVPEVL, EVLPhi}, 1471*0fca6ea1SDimitry Andric {CanonicalIVIncrement->hasNoUnsignedWrap(), 1472*0fca6ea1SDimitry Andric CanonicalIVIncrement->hasNoSignedWrap()}, 1473*0fca6ea1SDimitry Andric CanonicalIVIncrement->getDebugLoc(), "index.evl.next"); 1474*0fca6ea1SDimitry Andric NextEVLIV->insertBefore(CanonicalIVIncrement); 1475*0fca6ea1SDimitry Andric EVLPhi->addOperand(NextEVLIV); 1476*0fca6ea1SDimitry Andric 1477*0fca6ea1SDimitry Andric for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) { 1478*0fca6ea1SDimitry Andric for (VPUser *U : collectUsersRecursively(HeaderMask)) { 1479*0fca6ea1SDimitry Andric VPRecipeBase *NewRecipe = nullptr; 1480*0fca6ea1SDimitry Andric auto *CurRecipe = dyn_cast<VPRecipeBase>(U); 1481*0fca6ea1SDimitry Andric if (!CurRecipe) 14825f757f3fSDimitry Andric continue; 14835f757f3fSDimitry Andric 1484*0fca6ea1SDimitry Andric auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * { 1485*0fca6ea1SDimitry Andric assert(OrigMask && "Unmasked recipe when folding tail"); 1486*0fca6ea1SDimitry Andric return HeaderMask == OrigMask ? nullptr : OrigMask; 1487*0fca6ea1SDimitry Andric }; 1488*0fca6ea1SDimitry Andric if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(CurRecipe)) { 1489*0fca6ea1SDimitry Andric VPValue *NewMask = GetNewMask(MemR->getMask()); 1490*0fca6ea1SDimitry Andric if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR)) 1491*0fca6ea1SDimitry Andric NewRecipe = new VPWidenLoadEVLRecipe(L, VPEVL, NewMask); 1492*0fca6ea1SDimitry Andric else if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR)) 1493*0fca6ea1SDimitry Andric NewRecipe = new VPWidenStoreEVLRecipe(S, VPEVL, NewMask); 1494*0fca6ea1SDimitry Andric else 1495*0fca6ea1SDimitry Andric llvm_unreachable("unsupported recipe"); 1496*0fca6ea1SDimitry Andric } else if (auto *RedR = dyn_cast<VPReductionRecipe>(CurRecipe)) { 1497*0fca6ea1SDimitry Andric NewRecipe = new VPReductionEVLRecipe(RedR, VPEVL, 1498*0fca6ea1SDimitry Andric GetNewMask(RedR->getCondOp())); 1499*0fca6ea1SDimitry Andric } 1500*0fca6ea1SDimitry Andric 1501*0fca6ea1SDimitry Andric if (NewRecipe) { 1502*0fca6ea1SDimitry Andric [[maybe_unused]] unsigned NumDefVal = NewRecipe->getNumDefinedValues(); 1503*0fca6ea1SDimitry Andric assert(NumDefVal == CurRecipe->getNumDefinedValues() && 1504*0fca6ea1SDimitry Andric "New recipe must define the same number of values as the " 1505*0fca6ea1SDimitry Andric "original."); 1506*0fca6ea1SDimitry Andric assert( 1507*0fca6ea1SDimitry Andric NumDefVal <= 1 && 1508*0fca6ea1SDimitry Andric "Only supports recipes with a single definition or without users."); 1509*0fca6ea1SDimitry Andric NewRecipe->insertBefore(CurRecipe); 1510*0fca6ea1SDimitry Andric if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe>(NewRecipe)) { 1511*0fca6ea1SDimitry Andric VPValue *CurVPV = CurRecipe->getVPSingleValue(); 1512*0fca6ea1SDimitry Andric CurVPV->replaceAllUsesWith(NewRecipe->getVPSingleValue()); 1513*0fca6ea1SDimitry Andric } 1514*0fca6ea1SDimitry Andric CurRecipe->eraseFromParent(); 1515*0fca6ea1SDimitry Andric } 1516*0fca6ea1SDimitry Andric } 1517*0fca6ea1SDimitry Andric recursivelyDeleteDeadRecipes(HeaderMask); 1518*0fca6ea1SDimitry Andric } 1519*0fca6ea1SDimitry Andric // Replace all uses of VPCanonicalIVPHIRecipe by 1520*0fca6ea1SDimitry Andric // VPEVLBasedIVPHIRecipe except for the canonical IV increment. 1521*0fca6ea1SDimitry Andric CanonicalIVPHI->replaceAllUsesWith(EVLPhi); 1522*0fca6ea1SDimitry Andric CanonicalIVIncrement->setOperand(0, CanonicalIVPHI); 1523*0fca6ea1SDimitry Andric // TODO: support unroll factor > 1. 1524*0fca6ea1SDimitry Andric Plan.setUF(1); 1525*0fca6ea1SDimitry Andric return true; 1526*0fca6ea1SDimitry Andric } 1527*0fca6ea1SDimitry Andric 1528*0fca6ea1SDimitry Andric void VPlanTransforms::dropPoisonGeneratingRecipes( 1529*0fca6ea1SDimitry Andric VPlan &Plan, function_ref<bool(BasicBlock *)> BlockNeedsPredication) { 1530*0fca6ea1SDimitry Andric // Collect recipes in the backward slice of `Root` that may generate a poison 1531*0fca6ea1SDimitry Andric // value that is used after vectorization. 1532*0fca6ea1SDimitry Andric SmallPtrSet<VPRecipeBase *, 16> Visited; 1533*0fca6ea1SDimitry Andric auto collectPoisonGeneratingInstrsInBackwardSlice([&](VPRecipeBase *Root) { 1534*0fca6ea1SDimitry Andric SmallVector<VPRecipeBase *, 16> Worklist; 1535*0fca6ea1SDimitry Andric Worklist.push_back(Root); 1536*0fca6ea1SDimitry Andric 1537*0fca6ea1SDimitry Andric // Traverse the backward slice of Root through its use-def chain. 1538*0fca6ea1SDimitry Andric while (!Worklist.empty()) { 1539*0fca6ea1SDimitry Andric VPRecipeBase *CurRec = Worklist.back(); 1540*0fca6ea1SDimitry Andric Worklist.pop_back(); 1541*0fca6ea1SDimitry Andric 1542*0fca6ea1SDimitry Andric if (!Visited.insert(CurRec).second) 1543*0fca6ea1SDimitry Andric continue; 1544*0fca6ea1SDimitry Andric 1545*0fca6ea1SDimitry Andric // Prune search if we find another recipe generating a widen memory 1546*0fca6ea1SDimitry Andric // instruction. Widen memory instructions involved in address computation 1547*0fca6ea1SDimitry Andric // will lead to gather/scatter instructions, which don't need to be 1548*0fca6ea1SDimitry Andric // handled. 1549*0fca6ea1SDimitry Andric if (isa<VPWidenMemoryRecipe>(CurRec) || isa<VPInterleaveRecipe>(CurRec) || 1550*0fca6ea1SDimitry Andric isa<VPScalarIVStepsRecipe>(CurRec) || isa<VPHeaderPHIRecipe>(CurRec)) 1551*0fca6ea1SDimitry Andric continue; 1552*0fca6ea1SDimitry Andric 1553*0fca6ea1SDimitry Andric // This recipe contributes to the address computation of a widen 1554*0fca6ea1SDimitry Andric // load/store. If the underlying instruction has poison-generating flags, 1555*0fca6ea1SDimitry Andric // drop them directly. 1556*0fca6ea1SDimitry Andric if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(CurRec)) { 1557*0fca6ea1SDimitry Andric VPValue *A, *B; 1558*0fca6ea1SDimitry Andric using namespace llvm::VPlanPatternMatch; 1559*0fca6ea1SDimitry Andric // Dropping disjoint from an OR may yield incorrect results, as some 1560*0fca6ea1SDimitry Andric // analysis may have converted it to an Add implicitly (e.g. SCEV used 1561*0fca6ea1SDimitry Andric // for dependence analysis). Instead, replace it with an equivalent Add. 1562*0fca6ea1SDimitry Andric // This is possible as all users of the disjoint OR only access lanes 1563*0fca6ea1SDimitry Andric // where the operands are disjoint or poison otherwise. 1564*0fca6ea1SDimitry Andric if (match(RecWithFlags, m_BinaryOr(m_VPValue(A), m_VPValue(B))) && 1565*0fca6ea1SDimitry Andric RecWithFlags->isDisjoint()) { 1566*0fca6ea1SDimitry Andric VPBuilder Builder(RecWithFlags); 1567*0fca6ea1SDimitry Andric VPInstruction *New = Builder.createOverflowingOp( 1568*0fca6ea1SDimitry Andric Instruction::Add, {A, B}, {false, false}, 1569*0fca6ea1SDimitry Andric RecWithFlags->getDebugLoc()); 1570*0fca6ea1SDimitry Andric New->setUnderlyingValue(RecWithFlags->getUnderlyingValue()); 1571*0fca6ea1SDimitry Andric RecWithFlags->replaceAllUsesWith(New); 1572*0fca6ea1SDimitry Andric RecWithFlags->eraseFromParent(); 1573*0fca6ea1SDimitry Andric CurRec = New; 1574*0fca6ea1SDimitry Andric } else 1575*0fca6ea1SDimitry Andric RecWithFlags->dropPoisonGeneratingFlags(); 1576*0fca6ea1SDimitry Andric } else { 1577*0fca6ea1SDimitry Andric Instruction *Instr = dyn_cast_or_null<Instruction>( 1578*0fca6ea1SDimitry Andric CurRec->getVPSingleValue()->getUnderlyingValue()); 1579*0fca6ea1SDimitry Andric (void)Instr; 1580*0fca6ea1SDimitry Andric assert((!Instr || !Instr->hasPoisonGeneratingFlags()) && 1581*0fca6ea1SDimitry Andric "found instruction with poison generating flags not covered by " 1582*0fca6ea1SDimitry Andric "VPRecipeWithIRFlags"); 1583*0fca6ea1SDimitry Andric } 1584*0fca6ea1SDimitry Andric 1585*0fca6ea1SDimitry Andric // Add new definitions to the worklist. 1586*0fca6ea1SDimitry Andric for (VPValue *operand : CurRec->operands()) 1587*0fca6ea1SDimitry Andric if (VPRecipeBase *OpDef = operand->getDefiningRecipe()) 1588*0fca6ea1SDimitry Andric Worklist.push_back(OpDef); 1589*0fca6ea1SDimitry Andric } 1590*0fca6ea1SDimitry Andric }); 1591*0fca6ea1SDimitry Andric 1592*0fca6ea1SDimitry Andric // Traverse all the recipes in the VPlan and collect the poison-generating 1593*0fca6ea1SDimitry Andric // recipes in the backward slice starting at the address of a VPWidenRecipe or 1594*0fca6ea1SDimitry Andric // VPInterleaveRecipe. 1595*0fca6ea1SDimitry Andric auto Iter = vp_depth_first_deep(Plan.getEntry()); 1596*0fca6ea1SDimitry Andric for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) { 1597*0fca6ea1SDimitry Andric for (VPRecipeBase &Recipe : *VPBB) { 1598*0fca6ea1SDimitry Andric if (auto *WidenRec = dyn_cast<VPWidenMemoryRecipe>(&Recipe)) { 1599*0fca6ea1SDimitry Andric Instruction &UnderlyingInstr = WidenRec->getIngredient(); 1600*0fca6ea1SDimitry Andric VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe(); 1601*0fca6ea1SDimitry Andric if (AddrDef && WidenRec->isConsecutive() && 1602*0fca6ea1SDimitry Andric BlockNeedsPredication(UnderlyingInstr.getParent())) 1603*0fca6ea1SDimitry Andric collectPoisonGeneratingInstrsInBackwardSlice(AddrDef); 1604*0fca6ea1SDimitry Andric } else if (auto *InterleaveRec = dyn_cast<VPInterleaveRecipe>(&Recipe)) { 1605*0fca6ea1SDimitry Andric VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe(); 1606*0fca6ea1SDimitry Andric if (AddrDef) { 1607*0fca6ea1SDimitry Andric // Check if any member of the interleave group needs predication. 1608*0fca6ea1SDimitry Andric const InterleaveGroup<Instruction> *InterGroup = 1609*0fca6ea1SDimitry Andric InterleaveRec->getInterleaveGroup(); 1610*0fca6ea1SDimitry Andric bool NeedPredication = false; 1611*0fca6ea1SDimitry Andric for (int I = 0, NumMembers = InterGroup->getNumMembers(); 1612*0fca6ea1SDimitry Andric I < NumMembers; ++I) { 1613*0fca6ea1SDimitry Andric Instruction *Member = InterGroup->getMember(I); 1614*0fca6ea1SDimitry Andric if (Member) 1615*0fca6ea1SDimitry Andric NeedPredication |= BlockNeedsPredication(Member->getParent()); 1616*0fca6ea1SDimitry Andric } 1617*0fca6ea1SDimitry Andric 1618*0fca6ea1SDimitry Andric if (NeedPredication) 1619*0fca6ea1SDimitry Andric collectPoisonGeneratingInstrsInBackwardSlice(AddrDef); 1620*0fca6ea1SDimitry Andric } 1621*0fca6ea1SDimitry Andric } 1622*0fca6ea1SDimitry Andric } 16235f757f3fSDimitry Andric } 16245f757f3fSDimitry Andric } 1625