1 //===-- VPlanTransforms.cpp - Utility VPlan to VPlan transforms -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements a set of utility VPlan to VPlan transformations. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "VPlanTransforms.h" 15 #include "llvm/ADT/PostOrderIterator.h" 16 17 using namespace llvm; 18 19 void VPlanTransforms::VPInstructionsToVPRecipes( 20 Loop *OrigLoop, VPlanPtr &Plan, 21 function_ref<const InductionDescriptor *(PHINode *)> 22 GetIntOrFpInductionDescriptor, 23 SmallPtrSetImpl<Instruction *> &DeadInstructions, ScalarEvolution &SE) { 24 25 auto *TopRegion = cast<VPRegionBlock>(Plan->getEntry()); 26 ReversePostOrderTraversal<VPBlockBase *> RPOT(TopRegion->getEntry()); 27 28 for (VPBlockBase *Base : RPOT) { 29 // Do not widen instructions in pre-header and exit blocks. 30 if (Base->getNumPredecessors() == 0 || Base->getNumSuccessors() == 0) 31 continue; 32 33 VPBasicBlock *VPBB = Base->getEntryBasicBlock(); 34 // Introduce each ingredient into VPlan. 35 for (VPRecipeBase &Ingredient : llvm::make_early_inc_range(*VPBB)) { 36 VPValue *VPV = Ingredient.getVPSingleValue(); 37 Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue()); 38 if (DeadInstructions.count(Inst)) { 39 VPValue DummyValue; 40 VPV->replaceAllUsesWith(&DummyValue); 41 Ingredient.eraseFromParent(); 42 continue; 43 } 44 45 VPRecipeBase *NewRecipe = nullptr; 46 if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) { 47 auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue()); 48 if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) { 49 VPValue *Start = Plan->getOrAddVPValue(II->getStartValue()); 50 NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, *II); 51 } else { 52 Plan->addVPValue(Phi, VPPhi); 53 continue; 54 } 55 } else { 56 assert(isa<VPInstruction>(&Ingredient) && 57 "only VPInstructions expected here"); 58 assert(!isa<PHINode>(Inst) && "phis should be handled above"); 59 // Create VPWidenMemoryInstructionRecipe for loads and stores. 60 if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 61 NewRecipe = new VPWidenMemoryInstructionRecipe( 62 *Load, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)), 63 nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/); 64 } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 65 NewRecipe = new VPWidenMemoryInstructionRecipe( 66 *Store, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)), 67 Plan->getOrAddVPValue(Store->getValueOperand()), nullptr /*Mask*/, 68 false /*Consecutive*/, false /*Reverse*/); 69 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { 70 NewRecipe = new VPWidenGEPRecipe( 71 GEP, Plan->mapToVPValues(GEP->operands()), OrigLoop); 72 } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) { 73 NewRecipe = 74 new VPWidenCallRecipe(*CI, Plan->mapToVPValues(CI->args())); 75 } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) { 76 bool InvariantCond = 77 SE.isLoopInvariant(SE.getSCEV(SI->getOperand(0)), OrigLoop); 78 NewRecipe = new VPWidenSelectRecipe( 79 *SI, Plan->mapToVPValues(SI->operands()), InvariantCond); 80 } else { 81 NewRecipe = 82 new VPWidenRecipe(*Inst, Plan->mapToVPValues(Inst->operands())); 83 } 84 } 85 86 NewRecipe->insertBefore(&Ingredient); 87 if (NewRecipe->getNumDefinedValues() == 1) 88 VPV->replaceAllUsesWith(NewRecipe->getVPSingleValue()); 89 else 90 assert(NewRecipe->getNumDefinedValues() == 0 && 91 "Only recpies with zero or one defined values expected"); 92 Ingredient.eraseFromParent(); 93 Plan->removeVPValueFor(Inst); 94 for (auto *Def : NewRecipe->definedValues()) { 95 Plan->addVPValue(Inst, Def); 96 } 97 } 98 } 99 } 100 101 bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) { 102 auto Iter = depth_first( 103 VPBlockRecursiveTraversalWrapper<VPBlockBase *>(Plan.getEntry())); 104 bool Changed = false; 105 // First, collect the operands of all predicated replicate recipes as seeds 106 // for sinking. 107 SetVector<std::pair<VPBasicBlock *, VPValue *>> WorkList; 108 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) { 109 for (auto &Recipe : *VPBB) { 110 auto *RepR = dyn_cast<VPReplicateRecipe>(&Recipe); 111 if (!RepR || !RepR->isPredicated()) 112 continue; 113 for (VPValue *Op : RepR->operands()) 114 WorkList.insert(std::make_pair(RepR->getParent(), Op)); 115 } 116 } 117 118 // Try to sink each replicate recipe in the worklist. 119 while (!WorkList.empty()) { 120 VPBasicBlock *SinkTo; 121 VPValue *C; 122 std::tie(SinkTo, C) = WorkList.pop_back_val(); 123 auto *SinkCandidate = dyn_cast_or_null<VPReplicateRecipe>(C->Def); 124 if (!SinkCandidate || SinkCandidate->isUniform() || 125 SinkCandidate->getParent() == SinkTo || 126 SinkCandidate->mayHaveSideEffects() || 127 SinkCandidate->mayReadOrWriteMemory()) 128 continue; 129 130 bool NeedsDuplicating = false; 131 // All recipe users of the sink candidate must be in the same block SinkTo 132 // or all users outside of SinkTo must be uniform-after-vectorization ( 133 // i.e., only first lane is used) . In the latter case, we need to duplicate 134 // SinkCandidate. At the moment, we identify such UAV's by looking for the 135 // address operands of widened memory recipes. 136 auto CanSinkWithUser = [SinkTo, &NeedsDuplicating, 137 SinkCandidate](VPUser *U) { 138 auto *UI = dyn_cast<VPRecipeBase>(U); 139 if (!UI) 140 return false; 141 if (UI->getParent() == SinkTo) 142 return true; 143 auto *WidenI = dyn_cast<VPWidenMemoryInstructionRecipe>(UI); 144 if (WidenI && WidenI->getAddr() == SinkCandidate) { 145 NeedsDuplicating = true; 146 return true; 147 } 148 return false; 149 }; 150 if (!all_of(SinkCandidate->users(), CanSinkWithUser)) 151 continue; 152 153 if (NeedsDuplicating) { 154 Instruction *I = cast<Instruction>(SinkCandidate->getUnderlyingValue()); 155 auto *Clone = 156 new VPReplicateRecipe(I, SinkCandidate->operands(), true, false); 157 // TODO: add ".cloned" suffix to name of Clone's VPValue. 158 159 Clone->insertBefore(SinkCandidate); 160 SmallVector<VPUser *, 4> Users(SinkCandidate->users()); 161 for (auto *U : Users) { 162 auto *UI = cast<VPRecipeBase>(U); 163 if (UI->getParent() == SinkTo) 164 continue; 165 166 for (unsigned Idx = 0; Idx != UI->getNumOperands(); Idx++) { 167 if (UI->getOperand(Idx) != SinkCandidate) 168 continue; 169 UI->setOperand(Idx, Clone); 170 } 171 } 172 } 173 SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi()); 174 for (VPValue *Op : SinkCandidate->operands()) 175 WorkList.insert(std::make_pair(SinkTo, Op)); 176 Changed = true; 177 } 178 return Changed; 179 } 180 181 /// If \p R is a region with a VPBranchOnMaskRecipe in the entry block, return 182 /// the mask. 183 VPValue *getPredicatedMask(VPRegionBlock *R) { 184 auto *EntryBB = dyn_cast<VPBasicBlock>(R->getEntry()); 185 if (!EntryBB || EntryBB->size() != 1 || 186 !isa<VPBranchOnMaskRecipe>(EntryBB->begin())) 187 return nullptr; 188 189 return cast<VPBranchOnMaskRecipe>(&*EntryBB->begin())->getOperand(0); 190 } 191 192 /// If \p R is a triangle region, return the 'then' block of the triangle. 193 static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) { 194 auto *EntryBB = cast<VPBasicBlock>(R->getEntry()); 195 if (EntryBB->getNumSuccessors() != 2) 196 return nullptr; 197 198 auto *Succ0 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[0]); 199 auto *Succ1 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[1]); 200 if (!Succ0 || !Succ1) 201 return nullptr; 202 203 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1) 204 return nullptr; 205 if (Succ0->getSingleSuccessor() == Succ1) 206 return Succ0; 207 if (Succ1->getSingleSuccessor() == Succ0) 208 return Succ1; 209 return nullptr; 210 } 211 212 bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) { 213 SetVector<VPRegionBlock *> DeletedRegions; 214 bool Changed = false; 215 216 // Collect region blocks to process up-front, to avoid iterator invalidation 217 // issues while merging regions. 218 SmallVector<VPRegionBlock *, 8> CandidateRegions( 219 VPBlockUtils::blocksOnly<VPRegionBlock>(depth_first( 220 VPBlockRecursiveTraversalWrapper<VPBlockBase *>(Plan.getEntry())))); 221 222 // Check if Base is a predicated triangle, followed by an empty block, 223 // followed by another predicate triangle. If that's the case, move the 224 // recipes from the first to the second triangle. 225 for (VPRegionBlock *Region1 : CandidateRegions) { 226 if (DeletedRegions.contains(Region1)) 227 continue; 228 auto *MiddleBasicBlock = 229 dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor()); 230 if (!MiddleBasicBlock || !MiddleBasicBlock->empty()) 231 continue; 232 233 auto *Region2 = 234 dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor()); 235 if (!Region2) 236 continue; 237 238 VPValue *Mask1 = getPredicatedMask(Region1); 239 VPValue *Mask2 = getPredicatedMask(Region2); 240 if (!Mask1 || Mask1 != Mask2) 241 continue; 242 VPBasicBlock *Then1 = getPredicatedThenBlock(Region1); 243 VPBasicBlock *Then2 = getPredicatedThenBlock(Region2); 244 if (!Then1 || !Then2) 245 continue; 246 247 assert(Mask1 && Mask2 && "both region must have conditions"); 248 249 // Note: No fusion-preventing memory dependencies are expected in either 250 // region. Such dependencies should be rejected during earlier dependence 251 // checks, which guarantee accesses can be re-ordered for vectorization. 252 // 253 // Move recipes to the successor region. 254 for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1))) 255 ToMove.moveBefore(*Then2, Then2->getFirstNonPhi()); 256 257 auto *Merge1 = cast<VPBasicBlock>(Then1->getSingleSuccessor()); 258 auto *Merge2 = cast<VPBasicBlock>(Then2->getSingleSuccessor()); 259 260 // Move VPPredInstPHIRecipes from the merge block to the successor region's 261 // merge block. Update all users inside the successor region to use the 262 // original values. 263 for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) { 264 VPValue *PredInst1 = 265 cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0); 266 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue(); 267 SmallVector<VPUser *> Users(Phi1ToMoveV->users()); 268 for (VPUser *U : Users) { 269 auto *UI = dyn_cast<VPRecipeBase>(U); 270 if (!UI || UI->getParent() != Then2) 271 continue; 272 for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) { 273 if (Phi1ToMoveV != U->getOperand(I)) 274 continue; 275 U->setOperand(I, PredInst1); 276 } 277 } 278 279 Phi1ToMove.moveBefore(*Merge2, Merge2->begin()); 280 } 281 282 // Finally, remove the first region. 283 for (VPBlockBase *Pred : make_early_inc_range(Region1->getPredecessors())) { 284 VPBlockUtils::disconnectBlocks(Pred, Region1); 285 VPBlockUtils::connectBlocks(Pred, MiddleBasicBlock); 286 } 287 VPBlockUtils::disconnectBlocks(Region1, MiddleBasicBlock); 288 DeletedRegions.insert(Region1); 289 } 290 291 for (VPRegionBlock *ToDelete : DeletedRegions) 292 delete ToDelete; 293 return Changed; 294 } 295 296 void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) { 297 SmallVector<std::pair<VPRecipeBase *, VPValue *>> CastsToRemove; 298 for (auto &Phi : Plan.getEntry()->getEntryBasicBlock()->phis()) { 299 auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); 300 if (!IV || IV->getTruncInst()) 301 continue; 302 303 // Visit all casts connected to IV and in Casts. Collect them. 304 // remember them for removal. 305 auto &Casts = IV->getInductionDescriptor().getCastInsts(); 306 VPValue *FindMyCast = IV; 307 for (Instruction *IRCast : reverse(Casts)) { 308 VPRecipeBase *FoundUserCast = nullptr; 309 for (auto *U : FindMyCast->users()) { 310 auto *UserCast = cast<VPRecipeBase>(U); 311 if (UserCast->getNumDefinedValues() == 1 && 312 UserCast->getVPSingleValue()->getUnderlyingValue() == IRCast) { 313 FoundUserCast = UserCast; 314 break; 315 } 316 } 317 assert(FoundUserCast && "Missing a cast to remove"); 318 CastsToRemove.emplace_back(FoundUserCast, IV); 319 FindMyCast = FoundUserCast->getVPSingleValue(); 320 } 321 } 322 for (auto &E : CastsToRemove) { 323 E.first->getVPSingleValue()->replaceAllUsesWith(E.second); 324 E.first->eraseFromParent(); 325 } 326 } 327