xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1480093f4SDimitry Andric //===-- VPlanTransforms.cpp - Utility VPlan to VPlan transforms -----------===//
2480093f4SDimitry Andric //
3480093f4SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4480093f4SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5480093f4SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6480093f4SDimitry Andric //
7480093f4SDimitry Andric //===----------------------------------------------------------------------===//
8480093f4SDimitry Andric ///
9480093f4SDimitry Andric /// \file
10480093f4SDimitry Andric /// This file implements a set of utility VPlan to VPlan transformations.
11480093f4SDimitry Andric ///
12480093f4SDimitry Andric //===----------------------------------------------------------------------===//
13480093f4SDimitry Andric 
14480093f4SDimitry Andric #include "VPlanTransforms.h"
1506c3fb27SDimitry Andric #include "VPRecipeBuilder.h"
165f757f3fSDimitry Andric #include "VPlanAnalysis.h"
17bdd1243dSDimitry Andric #include "VPlanCFG.h"
185f757f3fSDimitry Andric #include "VPlanDominatorTree.h"
19*0fca6ea1SDimitry Andric #include "VPlanPatternMatch.h"
20480093f4SDimitry Andric #include "llvm/ADT/PostOrderIterator.h"
215f757f3fSDimitry Andric #include "llvm/ADT/STLExtras.h"
2281ad6265SDimitry Andric #include "llvm/ADT/SetVector.h"
2381ad6265SDimitry Andric #include "llvm/Analysis/IVDescriptors.h"
24bdd1243dSDimitry Andric #include "llvm/Analysis/VectorUtils.h"
25bdd1243dSDimitry Andric #include "llvm/IR/Intrinsics.h"
265f757f3fSDimitry Andric #include "llvm/IR/PatternMatch.h"
27480093f4SDimitry Andric 
28480093f4SDimitry Andric using namespace llvm;
29480093f4SDimitry Andric 
30480093f4SDimitry Andric void VPlanTransforms::VPInstructionsToVPRecipes(
3106c3fb27SDimitry Andric     VPlanPtr &Plan,
320eae32dcSDimitry Andric     function_ref<const InductionDescriptor *(PHINode *)>
330eae32dcSDimitry Andric         GetIntOrFpInductionDescriptor,
3406c3fb27SDimitry Andric     ScalarEvolution &SE, const TargetLibraryInfo &TLI) {
35480093f4SDimitry Andric 
36bdd1243dSDimitry Andric   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
37*0fca6ea1SDimitry Andric       Plan->getVectorLoopRegion());
3881ad6265SDimitry Andric   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
39*0fca6ea1SDimitry Andric     // Skip blocks outside region
40*0fca6ea1SDimitry Andric     if (!VPBB->getParent())
41*0fca6ea1SDimitry Andric       break;
4281ad6265SDimitry Andric     VPRecipeBase *Term = VPBB->getTerminator();
4381ad6265SDimitry Andric     auto EndIter = Term ? Term->getIterator() : VPBB->end();
44480093f4SDimitry Andric     // Introduce each ingredient into VPlan.
4581ad6265SDimitry Andric     for (VPRecipeBase &Ingredient :
4681ad6265SDimitry Andric          make_early_inc_range(make_range(VPBB->begin(), EndIter))) {
4781ad6265SDimitry Andric 
48349cc55cSDimitry Andric       VPValue *VPV = Ingredient.getVPSingleValue();
49fe6060f1SDimitry Andric       Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue());
50480093f4SDimitry Andric 
51480093f4SDimitry Andric       VPRecipeBase *NewRecipe = nullptr;
52349cc55cSDimitry Andric       if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) {
53fe6060f1SDimitry Andric         auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue());
54*0fca6ea1SDimitry Andric         const auto *II = GetIntOrFpInductionDescriptor(Phi);
55*0fca6ea1SDimitry Andric         if (!II)
56*0fca6ea1SDimitry Andric           continue;
57*0fca6ea1SDimitry Andric 
58*0fca6ea1SDimitry Andric         VPValue *Start = Plan->getOrAddLiveIn(II->getStartValue());
5981ad6265SDimitry Andric         VPValue *Step =
6081ad6265SDimitry Andric             vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE);
6106c3fb27SDimitry Andric         NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II);
62fe6060f1SDimitry Andric       } else {
63349cc55cSDimitry Andric         assert(isa<VPInstruction>(&Ingredient) &&
64fe6060f1SDimitry Andric                "only VPInstructions expected here");
65fe6060f1SDimitry Andric         assert(!isa<PHINode>(Inst) && "phis should be handled above");
66*0fca6ea1SDimitry Andric         // Create VPWidenMemoryRecipe for loads and stores.
67fe6060f1SDimitry Andric         if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
68*0fca6ea1SDimitry Andric           NewRecipe = new VPWidenLoadRecipe(
6906c3fb27SDimitry Andric               *Load, Ingredient.getOperand(0), nullptr /*Mask*/,
70*0fca6ea1SDimitry Andric               false /*Consecutive*/, false /*Reverse*/,
71*0fca6ea1SDimitry Andric               Ingredient.getDebugLoc());
72fe6060f1SDimitry Andric         } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
73*0fca6ea1SDimitry Andric           NewRecipe = new VPWidenStoreRecipe(
7406c3fb27SDimitry Andric               *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
75*0fca6ea1SDimitry Andric               nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/,
76*0fca6ea1SDimitry Andric               Ingredient.getDebugLoc());
77480093f4SDimitry Andric         } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
7806c3fb27SDimitry Andric           NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
79fe6060f1SDimitry Andric         } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
807a6dacacSDimitry Andric           NewRecipe = new VPWidenCallRecipe(
81*0fca6ea1SDimitry Andric               CI, Ingredient.operands(), getVectorIntrinsicIDForCall(CI, &TLI),
82*0fca6ea1SDimitry Andric               CI->getDebugLoc());
83fe6060f1SDimitry Andric         } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
8406c3fb27SDimitry Andric           NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands());
8506c3fb27SDimitry Andric         } else if (auto *CI = dyn_cast<CastInst>(Inst)) {
8606c3fb27SDimitry Andric           NewRecipe = new VPWidenCastRecipe(
875f757f3fSDimitry Andric               CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), *CI);
88fe6060f1SDimitry Andric         } else {
8906c3fb27SDimitry Andric           NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands());
90fe6060f1SDimitry Andric         }
91fe6060f1SDimitry Andric       }
92480093f4SDimitry Andric 
93349cc55cSDimitry Andric       NewRecipe->insertBefore(&Ingredient);
94e8d8bef9SDimitry Andric       if (NewRecipe->getNumDefinedValues() == 1)
95fe6060f1SDimitry Andric         VPV->replaceAllUsesWith(NewRecipe->getVPSingleValue());
96e8d8bef9SDimitry Andric       else
97e8d8bef9SDimitry Andric         assert(NewRecipe->getNumDefinedValues() == 0 &&
98e8d8bef9SDimitry Andric                "Only recpies with zero or one defined values expected");
99349cc55cSDimitry Andric       Ingredient.eraseFromParent();
100480093f4SDimitry Andric     }
101480093f4SDimitry Andric   }
102fe6060f1SDimitry Andric }
103fe6060f1SDimitry Andric 
10406c3fb27SDimitry Andric static bool sinkScalarOperands(VPlan &Plan) {
105bdd1243dSDimitry Andric   auto Iter = vp_depth_first_deep(Plan.getEntry());
106fe6060f1SDimitry Andric   bool Changed = false;
107bdd1243dSDimitry Andric   // First, collect the operands of all recipes in replicate blocks as seeds for
108bdd1243dSDimitry Andric   // sinking.
1097a6dacacSDimitry Andric   SetVector<std::pair<VPBasicBlock *, VPSingleDefRecipe *>> WorkList;
110bdd1243dSDimitry Andric   for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) {
111bdd1243dSDimitry Andric     VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock();
112bdd1243dSDimitry Andric     if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)
113fe6060f1SDimitry Andric       continue;
114bdd1243dSDimitry Andric     VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]);
115bdd1243dSDimitry Andric     if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())
116bdd1243dSDimitry Andric       continue;
117bdd1243dSDimitry Andric     for (auto &Recipe : *VPBB) {
118bdd1243dSDimitry Andric       for (VPValue *Op : Recipe.operands())
1197a6dacacSDimitry Andric         if (auto *Def =
1207a6dacacSDimitry Andric                 dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
121bdd1243dSDimitry Andric           WorkList.insert(std::make_pair(VPBB, Def));
122fe6060f1SDimitry Andric     }
123fe6060f1SDimitry Andric   }
124fe6060f1SDimitry Andric 
125bdd1243dSDimitry Andric   bool ScalarVFOnly = Plan.hasScalarVFOnly();
126bdd1243dSDimitry Andric   // Try to sink each replicate or scalar IV steps recipe in the worklist.
127bdd1243dSDimitry Andric   for (unsigned I = 0; I != WorkList.size(); ++I) {
128349cc55cSDimitry Andric     VPBasicBlock *SinkTo;
1297a6dacacSDimitry Andric     VPSingleDefRecipe *SinkCandidate;
130bdd1243dSDimitry Andric     std::tie(SinkTo, SinkCandidate) = WorkList[I];
131bdd1243dSDimitry Andric     if (SinkCandidate->getParent() == SinkTo ||
132fe6060f1SDimitry Andric         SinkCandidate->mayHaveSideEffects() ||
133fe6060f1SDimitry Andric         SinkCandidate->mayReadOrWriteMemory())
134fe6060f1SDimitry Andric       continue;
135bdd1243dSDimitry Andric     if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) {
136bdd1243dSDimitry Andric       if (!ScalarVFOnly && RepR->isUniform())
137bdd1243dSDimitry Andric         continue;
138bdd1243dSDimitry Andric     } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate))
139bdd1243dSDimitry Andric       continue;
140fe6060f1SDimitry Andric 
141349cc55cSDimitry Andric     bool NeedsDuplicating = false;
142349cc55cSDimitry Andric     // All recipe users of the sink candidate must be in the same block SinkTo
143349cc55cSDimitry Andric     // or all users outside of SinkTo must be uniform-after-vectorization (
144349cc55cSDimitry Andric     // i.e., only first lane is used) . In the latter case, we need to duplicate
145bdd1243dSDimitry Andric     // SinkCandidate.
146349cc55cSDimitry Andric     auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
147349cc55cSDimitry Andric                             SinkCandidate](VPUser *U) {
148fe6060f1SDimitry Andric       auto *UI = dyn_cast<VPRecipeBase>(U);
149349cc55cSDimitry Andric       if (!UI)
150349cc55cSDimitry Andric         return false;
151349cc55cSDimitry Andric       if (UI->getParent() == SinkTo)
152349cc55cSDimitry Andric         return true;
1537a6dacacSDimitry Andric       NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);
154bdd1243dSDimitry Andric       // We only know how to duplicate VPRecipeRecipes for now.
155bdd1243dSDimitry Andric       return NeedsDuplicating && isa<VPReplicateRecipe>(SinkCandidate);
156349cc55cSDimitry Andric     };
1577a6dacacSDimitry Andric     if (!all_of(SinkCandidate->users(), CanSinkWithUser))
158fe6060f1SDimitry Andric       continue;
159fe6060f1SDimitry Andric 
160349cc55cSDimitry Andric     if (NeedsDuplicating) {
161bdd1243dSDimitry Andric       if (ScalarVFOnly)
162bdd1243dSDimitry Andric         continue;
163*0fca6ea1SDimitry Andric       Instruction *I = SinkCandidate->getUnderlyingInstr();
16406c3fb27SDimitry Andric       auto *Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true);
165349cc55cSDimitry Andric       // TODO: add ".cloned" suffix to name of Clone's VPValue.
166349cc55cSDimitry Andric 
167349cc55cSDimitry Andric       Clone->insertBefore(SinkCandidate);
1687a6dacacSDimitry Andric       SinkCandidate->replaceUsesWithIf(Clone, [SinkTo](VPUser &U, unsigned) {
1695f757f3fSDimitry Andric         return cast<VPRecipeBase>(&U)->getParent() != SinkTo;
1705f757f3fSDimitry Andric       });
171349cc55cSDimitry Andric     }
172fe6060f1SDimitry Andric     SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi());
173349cc55cSDimitry Andric     for (VPValue *Op : SinkCandidate->operands())
1747a6dacacSDimitry Andric       if (auto *Def =
1757a6dacacSDimitry Andric               dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
176bdd1243dSDimitry Andric         WorkList.insert(std::make_pair(SinkTo, Def));
177fe6060f1SDimitry Andric     Changed = true;
178fe6060f1SDimitry Andric   }
179fe6060f1SDimitry Andric   return Changed;
180fe6060f1SDimitry Andric }
181fe6060f1SDimitry Andric 
182fe6060f1SDimitry Andric /// If \p R is a region with a VPBranchOnMaskRecipe in the entry block, return
183fe6060f1SDimitry Andric /// the mask.
184fe6060f1SDimitry Andric VPValue *getPredicatedMask(VPRegionBlock *R) {
185fe6060f1SDimitry Andric   auto *EntryBB = dyn_cast<VPBasicBlock>(R->getEntry());
186fe6060f1SDimitry Andric   if (!EntryBB || EntryBB->size() != 1 ||
187fe6060f1SDimitry Andric       !isa<VPBranchOnMaskRecipe>(EntryBB->begin()))
188fe6060f1SDimitry Andric     return nullptr;
189fe6060f1SDimitry Andric 
190fe6060f1SDimitry Andric   return cast<VPBranchOnMaskRecipe>(&*EntryBB->begin())->getOperand(0);
191fe6060f1SDimitry Andric }
192fe6060f1SDimitry Andric 
193fe6060f1SDimitry Andric /// If \p R is a triangle region, return the 'then' block of the triangle.
194fe6060f1SDimitry Andric static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) {
195fe6060f1SDimitry Andric   auto *EntryBB = cast<VPBasicBlock>(R->getEntry());
196fe6060f1SDimitry Andric   if (EntryBB->getNumSuccessors() != 2)
197fe6060f1SDimitry Andric     return nullptr;
198fe6060f1SDimitry Andric 
199fe6060f1SDimitry Andric   auto *Succ0 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[0]);
200fe6060f1SDimitry Andric   auto *Succ1 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[1]);
201fe6060f1SDimitry Andric   if (!Succ0 || !Succ1)
202fe6060f1SDimitry Andric     return nullptr;
203fe6060f1SDimitry Andric 
204fe6060f1SDimitry Andric   if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
205fe6060f1SDimitry Andric     return nullptr;
206fe6060f1SDimitry Andric   if (Succ0->getSingleSuccessor() == Succ1)
207fe6060f1SDimitry Andric     return Succ0;
208fe6060f1SDimitry Andric   if (Succ1->getSingleSuccessor() == Succ0)
209fe6060f1SDimitry Andric     return Succ1;
210fe6060f1SDimitry Andric   return nullptr;
211fe6060f1SDimitry Andric }
212fe6060f1SDimitry Andric 
21306c3fb27SDimitry Andric // Merge replicate regions in their successor region, if a replicate region
21406c3fb27SDimitry Andric // is connected to a successor replicate region with the same predicate by a
21506c3fb27SDimitry Andric // single, empty VPBasicBlock.
21606c3fb27SDimitry Andric static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan) {
217fe6060f1SDimitry Andric   SetVector<VPRegionBlock *> DeletedRegions;
218fe6060f1SDimitry Andric 
219bdd1243dSDimitry Andric   // Collect replicate regions followed by an empty block, followed by another
220bdd1243dSDimitry Andric   // replicate region with matching masks to process front. This is to avoid
221bdd1243dSDimitry Andric   // iterator invalidation issues while merging regions.
222bdd1243dSDimitry Andric   SmallVector<VPRegionBlock *, 8> WorkList;
223bdd1243dSDimitry Andric   for (VPRegionBlock *Region1 : VPBlockUtils::blocksOnly<VPRegionBlock>(
224bdd1243dSDimitry Andric            vp_depth_first_deep(Plan.getEntry()))) {
225bdd1243dSDimitry Andric     if (!Region1->isReplicator())
226fe6060f1SDimitry Andric       continue;
227fe6060f1SDimitry Andric     auto *MiddleBasicBlock =
228fe6060f1SDimitry Andric         dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor());
229fe6060f1SDimitry Andric     if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
230fe6060f1SDimitry Andric       continue;
231fe6060f1SDimitry Andric 
232fe6060f1SDimitry Andric     auto *Region2 =
233fe6060f1SDimitry Andric         dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
234bdd1243dSDimitry Andric     if (!Region2 || !Region2->isReplicator())
235fe6060f1SDimitry Andric       continue;
236fe6060f1SDimitry Andric 
237fe6060f1SDimitry Andric     VPValue *Mask1 = getPredicatedMask(Region1);
238fe6060f1SDimitry Andric     VPValue *Mask2 = getPredicatedMask(Region2);
239fe6060f1SDimitry Andric     if (!Mask1 || Mask1 != Mask2)
240fe6060f1SDimitry Andric       continue;
241bdd1243dSDimitry Andric 
242bdd1243dSDimitry Andric     assert(Mask1 && Mask2 && "both region must have conditions");
243bdd1243dSDimitry Andric     WorkList.push_back(Region1);
244bdd1243dSDimitry Andric   }
245bdd1243dSDimitry Andric 
246bdd1243dSDimitry Andric   // Move recipes from Region1 to its successor region, if both are triangles.
247bdd1243dSDimitry Andric   for (VPRegionBlock *Region1 : WorkList) {
248bdd1243dSDimitry Andric     if (DeletedRegions.contains(Region1))
249bdd1243dSDimitry Andric       continue;
250bdd1243dSDimitry Andric     auto *MiddleBasicBlock = cast<VPBasicBlock>(Region1->getSingleSuccessor());
251bdd1243dSDimitry Andric     auto *Region2 = cast<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
252bdd1243dSDimitry Andric 
253fe6060f1SDimitry Andric     VPBasicBlock *Then1 = getPredicatedThenBlock(Region1);
254fe6060f1SDimitry Andric     VPBasicBlock *Then2 = getPredicatedThenBlock(Region2);
255fe6060f1SDimitry Andric     if (!Then1 || !Then2)
256fe6060f1SDimitry Andric       continue;
257fe6060f1SDimitry Andric 
258fe6060f1SDimitry Andric     // Note: No fusion-preventing memory dependencies are expected in either
259fe6060f1SDimitry Andric     // region. Such dependencies should be rejected during earlier dependence
260fe6060f1SDimitry Andric     // checks, which guarantee accesses can be re-ordered for vectorization.
261fe6060f1SDimitry Andric     //
262fe6060f1SDimitry Andric     // Move recipes to the successor region.
263fe6060f1SDimitry Andric     for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1)))
264fe6060f1SDimitry Andric       ToMove.moveBefore(*Then2, Then2->getFirstNonPhi());
265fe6060f1SDimitry Andric 
266fe6060f1SDimitry Andric     auto *Merge1 = cast<VPBasicBlock>(Then1->getSingleSuccessor());
267fe6060f1SDimitry Andric     auto *Merge2 = cast<VPBasicBlock>(Then2->getSingleSuccessor());
268fe6060f1SDimitry Andric 
269fe6060f1SDimitry Andric     // Move VPPredInstPHIRecipes from the merge block to the successor region's
270fe6060f1SDimitry Andric     // merge block. Update all users inside the successor region to use the
271fe6060f1SDimitry Andric     // original values.
272fe6060f1SDimitry Andric     for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) {
273fe6060f1SDimitry Andric       VPValue *PredInst1 =
274fe6060f1SDimitry Andric           cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0);
2758c6f6c0cSDimitry Andric       VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
2765f757f3fSDimitry Andric       Phi1ToMoveV->replaceUsesWithIf(PredInst1, [Then2](VPUser &U, unsigned) {
2775f757f3fSDimitry Andric         auto *UI = dyn_cast<VPRecipeBase>(&U);
2785f757f3fSDimitry Andric         return UI && UI->getParent() == Then2;
2795f757f3fSDimitry Andric       });
280fe6060f1SDimitry Andric 
281*0fca6ea1SDimitry Andric       // Remove phi recipes that are unused after merging the regions.
282*0fca6ea1SDimitry Andric       if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
283*0fca6ea1SDimitry Andric         Phi1ToMove.eraseFromParent();
284*0fca6ea1SDimitry Andric         continue;
285*0fca6ea1SDimitry Andric       }
286fe6060f1SDimitry Andric       Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
287fe6060f1SDimitry Andric     }
288fe6060f1SDimitry Andric 
289fe6060f1SDimitry Andric     // Finally, remove the first region.
290fe6060f1SDimitry Andric     for (VPBlockBase *Pred : make_early_inc_range(Region1->getPredecessors())) {
291fe6060f1SDimitry Andric       VPBlockUtils::disconnectBlocks(Pred, Region1);
292fe6060f1SDimitry Andric       VPBlockUtils::connectBlocks(Pred, MiddleBasicBlock);
293fe6060f1SDimitry Andric     }
294fe6060f1SDimitry Andric     VPBlockUtils::disconnectBlocks(Region1, MiddleBasicBlock);
295fe6060f1SDimitry Andric     DeletedRegions.insert(Region1);
296fe6060f1SDimitry Andric   }
297fe6060f1SDimitry Andric 
298fe6060f1SDimitry Andric   for (VPRegionBlock *ToDelete : DeletedRegions)
299fe6060f1SDimitry Andric     delete ToDelete;
300bdd1243dSDimitry Andric   return !DeletedRegions.empty();
301bdd1243dSDimitry Andric }
302bdd1243dSDimitry Andric 
30306c3fb27SDimitry Andric static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe,
30406c3fb27SDimitry Andric                                             VPlan &Plan) {
30506c3fb27SDimitry Andric   Instruction *Instr = PredRecipe->getUnderlyingInstr();
30606c3fb27SDimitry Andric   // Build the triangular if-then region.
30706c3fb27SDimitry Andric   std::string RegionName = (Twine("pred.") + Instr->getOpcodeName()).str();
30806c3fb27SDimitry Andric   assert(Instr->getParent() && "Predicated instruction not in any basic block");
30906c3fb27SDimitry Andric   auto *BlockInMask = PredRecipe->getMask();
31006c3fb27SDimitry Andric   auto *BOMRecipe = new VPBranchOnMaskRecipe(BlockInMask);
31106c3fb27SDimitry Andric   auto *Entry = new VPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe);
31206c3fb27SDimitry Andric 
31306c3fb27SDimitry Andric   // Replace predicated replicate recipe with a replicate recipe without a
31406c3fb27SDimitry Andric   // mask but in the replicate region.
31506c3fb27SDimitry Andric   auto *RecipeWithoutMask = new VPReplicateRecipe(
31606c3fb27SDimitry Andric       PredRecipe->getUnderlyingInstr(),
31706c3fb27SDimitry Andric       make_range(PredRecipe->op_begin(), std::prev(PredRecipe->op_end())),
31806c3fb27SDimitry Andric       PredRecipe->isUniform());
31906c3fb27SDimitry Andric   auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask);
32006c3fb27SDimitry Andric 
32106c3fb27SDimitry Andric   VPPredInstPHIRecipe *PHIRecipe = nullptr;
32206c3fb27SDimitry Andric   if (PredRecipe->getNumUsers() != 0) {
32306c3fb27SDimitry Andric     PHIRecipe = new VPPredInstPHIRecipe(RecipeWithoutMask);
32406c3fb27SDimitry Andric     PredRecipe->replaceAllUsesWith(PHIRecipe);
32506c3fb27SDimitry Andric     PHIRecipe->setOperand(0, RecipeWithoutMask);
32606c3fb27SDimitry Andric   }
32706c3fb27SDimitry Andric   PredRecipe->eraseFromParent();
32806c3fb27SDimitry Andric   auto *Exiting = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe);
32906c3fb27SDimitry Andric   VPRegionBlock *Region = new VPRegionBlock(Entry, Exiting, RegionName, true);
33006c3fb27SDimitry Andric 
33106c3fb27SDimitry Andric   // Note: first set Entry as region entry and then connect successors starting
33206c3fb27SDimitry Andric   // from it in order, to propagate the "parent" of each VPBasicBlock.
33306c3fb27SDimitry Andric   VPBlockUtils::insertTwoBlocksAfter(Pred, Exiting, Entry);
33406c3fb27SDimitry Andric   VPBlockUtils::connectBlocks(Pred, Exiting);
33506c3fb27SDimitry Andric 
33606c3fb27SDimitry Andric   return Region;
33706c3fb27SDimitry Andric }
33806c3fb27SDimitry Andric 
33906c3fb27SDimitry Andric static void addReplicateRegions(VPlan &Plan) {
34006c3fb27SDimitry Andric   SmallVector<VPReplicateRecipe *> WorkList;
34106c3fb27SDimitry Andric   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
34206c3fb27SDimitry Andric            vp_depth_first_deep(Plan.getEntry()))) {
34306c3fb27SDimitry Andric     for (VPRecipeBase &R : *VPBB)
34406c3fb27SDimitry Andric       if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
34506c3fb27SDimitry Andric         if (RepR->isPredicated())
34606c3fb27SDimitry Andric           WorkList.push_back(RepR);
34706c3fb27SDimitry Andric       }
34806c3fb27SDimitry Andric   }
34906c3fb27SDimitry Andric 
35006c3fb27SDimitry Andric   unsigned BBNum = 0;
35106c3fb27SDimitry Andric   for (VPReplicateRecipe *RepR : WorkList) {
35206c3fb27SDimitry Andric     VPBasicBlock *CurrentBlock = RepR->getParent();
35306c3fb27SDimitry Andric     VPBasicBlock *SplitBlock = CurrentBlock->splitAt(RepR->getIterator());
35406c3fb27SDimitry Andric 
35506c3fb27SDimitry Andric     BasicBlock *OrigBB = RepR->getUnderlyingInstr()->getParent();
35606c3fb27SDimitry Andric     SplitBlock->setName(
35706c3fb27SDimitry Andric         OrigBB->hasName() ? OrigBB->getName() + "." + Twine(BBNum++) : "");
35806c3fb27SDimitry Andric     // Record predicated instructions for above packing optimizations.
35906c3fb27SDimitry Andric     VPBlockBase *Region = createReplicateRegion(RepR, Plan);
36006c3fb27SDimitry Andric     Region->setParent(CurrentBlock->getParent());
36106c3fb27SDimitry Andric     VPBlockUtils::disconnectBlocks(CurrentBlock, SplitBlock);
36206c3fb27SDimitry Andric     VPBlockUtils::connectBlocks(CurrentBlock, Region);
36306c3fb27SDimitry Andric     VPBlockUtils::connectBlocks(Region, SplitBlock);
36406c3fb27SDimitry Andric   }
36506c3fb27SDimitry Andric }
36606c3fb27SDimitry Andric 
367*0fca6ea1SDimitry Andric /// Remove redundant VPBasicBlocks by merging them into their predecessor if
368*0fca6ea1SDimitry Andric /// the predecessor has a single successor.
369*0fca6ea1SDimitry Andric static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
370bdd1243dSDimitry Andric   SmallVector<VPBasicBlock *> WorkList;
371bdd1243dSDimitry Andric   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
372bdd1243dSDimitry Andric            vp_depth_first_deep(Plan.getEntry()))) {
373*0fca6ea1SDimitry Andric     // Don't fold the exit block of the Plan into its single predecessor for
374*0fca6ea1SDimitry Andric     // now.
375*0fca6ea1SDimitry Andric     // TODO: Remove restriction once more of the skeleton is modeled in VPlan.
376*0fca6ea1SDimitry Andric     if (VPBB->getNumSuccessors() == 0 && !VPBB->getParent())
377*0fca6ea1SDimitry Andric       continue;
378bdd1243dSDimitry Andric     auto *PredVPBB =
379bdd1243dSDimitry Andric         dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());
380*0fca6ea1SDimitry Andric     if (!PredVPBB || PredVPBB->getNumSuccessors() != 1)
381*0fca6ea1SDimitry Andric       continue;
382bdd1243dSDimitry Andric     WorkList.push_back(VPBB);
383bdd1243dSDimitry Andric   }
384bdd1243dSDimitry Andric 
385bdd1243dSDimitry Andric   for (VPBasicBlock *VPBB : WorkList) {
386bdd1243dSDimitry Andric     VPBasicBlock *PredVPBB = cast<VPBasicBlock>(VPBB->getSinglePredecessor());
387bdd1243dSDimitry Andric     for (VPRecipeBase &R : make_early_inc_range(*VPBB))
388bdd1243dSDimitry Andric       R.moveBefore(*PredVPBB, PredVPBB->end());
389bdd1243dSDimitry Andric     VPBlockUtils::disconnectBlocks(PredVPBB, VPBB);
390bdd1243dSDimitry Andric     auto *ParentRegion = cast_or_null<VPRegionBlock>(VPBB->getParent());
391bdd1243dSDimitry Andric     if (ParentRegion && ParentRegion->getExiting() == VPBB)
392bdd1243dSDimitry Andric       ParentRegion->setExiting(PredVPBB);
393bdd1243dSDimitry Andric     for (auto *Succ : to_vector(VPBB->successors())) {
394bdd1243dSDimitry Andric       VPBlockUtils::disconnectBlocks(VPBB, Succ);
395bdd1243dSDimitry Andric       VPBlockUtils::connectBlocks(PredVPBB, Succ);
396bdd1243dSDimitry Andric     }
397bdd1243dSDimitry Andric     delete VPBB;
398bdd1243dSDimitry Andric   }
399bdd1243dSDimitry Andric   return !WorkList.empty();
400fe6060f1SDimitry Andric }
4010eae32dcSDimitry Andric 
402*0fca6ea1SDimitry Andric void VPlanTransforms::createAndOptimizeReplicateRegions(VPlan &Plan) {
403*0fca6ea1SDimitry Andric   // Convert masked VPReplicateRecipes to if-then region blocks.
404*0fca6ea1SDimitry Andric   addReplicateRegions(Plan);
405*0fca6ea1SDimitry Andric 
406*0fca6ea1SDimitry Andric   bool ShouldSimplify = true;
407*0fca6ea1SDimitry Andric   while (ShouldSimplify) {
408*0fca6ea1SDimitry Andric     ShouldSimplify = sinkScalarOperands(Plan);
409*0fca6ea1SDimitry Andric     ShouldSimplify |= mergeReplicateRegionsIntoSuccessors(Plan);
410*0fca6ea1SDimitry Andric     ShouldSimplify |= mergeBlocksIntoPredecessors(Plan);
411*0fca6ea1SDimitry Andric   }
412*0fca6ea1SDimitry Andric }
413*0fca6ea1SDimitry Andric 
414*0fca6ea1SDimitry Andric /// Remove redundant casts of inductions.
415*0fca6ea1SDimitry Andric ///
416*0fca6ea1SDimitry Andric /// Such redundant casts are casts of induction variables that can be ignored,
417*0fca6ea1SDimitry Andric /// because we already proved that the casted phi is equal to the uncasted phi
418*0fca6ea1SDimitry Andric /// in the vectorized loop. There is no need to vectorize the cast - the same
419*0fca6ea1SDimitry Andric /// value can be used for both the phi and casts in the vector loop.
420*0fca6ea1SDimitry Andric static void removeRedundantInductionCasts(VPlan &Plan) {
42181ad6265SDimitry Andric   for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
4220eae32dcSDimitry Andric     auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
4230eae32dcSDimitry Andric     if (!IV || IV->getTruncInst())
4240eae32dcSDimitry Andric       continue;
4250eae32dcSDimitry Andric 
42681ad6265SDimitry Andric     // A sequence of IR Casts has potentially been recorded for IV, which
42781ad6265SDimitry Andric     // *must be bypassed* when the IV is vectorized, because the vectorized IV
42881ad6265SDimitry Andric     // will produce the desired casted value. This sequence forms a def-use
42981ad6265SDimitry Andric     // chain and is provided in reverse order, ending with the cast that uses
43081ad6265SDimitry Andric     // the IV phi. Search for the recipe of the last cast in the chain and
43181ad6265SDimitry Andric     // replace it with the original IV. Note that only the final cast is
43281ad6265SDimitry Andric     // expected to have users outside the cast-chain and the dead casts left
43381ad6265SDimitry Andric     // over will be cleaned up later.
4340eae32dcSDimitry Andric     auto &Casts = IV->getInductionDescriptor().getCastInsts();
4350eae32dcSDimitry Andric     VPValue *FindMyCast = IV;
4360eae32dcSDimitry Andric     for (Instruction *IRCast : reverse(Casts)) {
4377a6dacacSDimitry Andric       VPSingleDefRecipe *FoundUserCast = nullptr;
4380eae32dcSDimitry Andric       for (auto *U : FindMyCast->users()) {
4397a6dacacSDimitry Andric         auto *UserCast = dyn_cast<VPSingleDefRecipe>(U);
4407a6dacacSDimitry Andric         if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
4410eae32dcSDimitry Andric           FoundUserCast = UserCast;
4420eae32dcSDimitry Andric           break;
4430eae32dcSDimitry Andric         }
4440eae32dcSDimitry Andric       }
4457a6dacacSDimitry Andric       FindMyCast = FoundUserCast;
4460eae32dcSDimitry Andric     }
44781ad6265SDimitry Andric     FindMyCast->replaceAllUsesWith(IV);
4480eae32dcSDimitry Andric   }
4490eae32dcSDimitry Andric }
45004eeddc0SDimitry Andric 
451*0fca6ea1SDimitry Andric /// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV
452*0fca6ea1SDimitry Andric /// recipe, if it exists.
453*0fca6ea1SDimitry Andric static void removeRedundantCanonicalIVs(VPlan &Plan) {
45404eeddc0SDimitry Andric   VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
45504eeddc0SDimitry Andric   VPWidenCanonicalIVRecipe *WidenNewIV = nullptr;
45604eeddc0SDimitry Andric   for (VPUser *U : CanonicalIV->users()) {
45704eeddc0SDimitry Andric     WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U);
45804eeddc0SDimitry Andric     if (WidenNewIV)
45904eeddc0SDimitry Andric       break;
46004eeddc0SDimitry Andric   }
46104eeddc0SDimitry Andric 
46204eeddc0SDimitry Andric   if (!WidenNewIV)
46304eeddc0SDimitry Andric     return;
46404eeddc0SDimitry Andric 
46504eeddc0SDimitry Andric   VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
46604eeddc0SDimitry Andric   for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
46704eeddc0SDimitry Andric     auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
46804eeddc0SDimitry Andric 
469*0fca6ea1SDimitry Andric     if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
4701fd87a68SDimitry Andric       continue;
4711fd87a68SDimitry Andric 
4721fd87a68SDimitry Andric     // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides
4731fd87a68SDimitry Andric     // everything WidenNewIV's users need. That is, WidenOriginalIV will
4741fd87a68SDimitry Andric     // generate a vector phi or all users of WidenNewIV demand the first lane
4751fd87a68SDimitry Andric     // only.
47606c3fb27SDimitry Andric     if (any_of(WidenOriginalIV->users(),
47706c3fb27SDimitry Andric                [WidenOriginalIV](VPUser *U) {
47806c3fb27SDimitry Andric                  return !U->usesScalars(WidenOriginalIV);
47906c3fb27SDimitry Andric                }) ||
4801fd87a68SDimitry Andric         vputils::onlyFirstLaneUsed(WidenNewIV)) {
48104eeddc0SDimitry Andric       WidenNewIV->replaceAllUsesWith(WidenOriginalIV);
48204eeddc0SDimitry Andric       WidenNewIV->eraseFromParent();
48304eeddc0SDimitry Andric       return;
48404eeddc0SDimitry Andric     }
48504eeddc0SDimitry Andric   }
48604eeddc0SDimitry Andric }
48781ad6265SDimitry Andric 
488*0fca6ea1SDimitry Andric /// Returns true if \p R is dead and can be removed.
489*0fca6ea1SDimitry Andric static bool isDeadRecipe(VPRecipeBase &R) {
490*0fca6ea1SDimitry Andric   using namespace llvm::PatternMatch;
491*0fca6ea1SDimitry Andric   // Do remove conditional assume instructions as their conditions may be
492*0fca6ea1SDimitry Andric   // flattened.
493*0fca6ea1SDimitry Andric   auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
494*0fca6ea1SDimitry Andric   bool IsConditionalAssume =
495*0fca6ea1SDimitry Andric       RepR && RepR->isPredicated() &&
496*0fca6ea1SDimitry Andric       match(RepR->getUnderlyingInstr(), m_Intrinsic<Intrinsic::assume>());
497*0fca6ea1SDimitry Andric   if (IsConditionalAssume)
498*0fca6ea1SDimitry Andric     return true;
499*0fca6ea1SDimitry Andric 
500*0fca6ea1SDimitry Andric   if (R.mayHaveSideEffects())
501*0fca6ea1SDimitry Andric     return false;
502*0fca6ea1SDimitry Andric 
503*0fca6ea1SDimitry Andric   // Recipe is dead if no user keeps the recipe alive.
504*0fca6ea1SDimitry Andric   return all_of(R.definedValues(),
505*0fca6ea1SDimitry Andric                 [](VPValue *V) { return V->getNumUsers() == 0; });
506*0fca6ea1SDimitry Andric }
507*0fca6ea1SDimitry Andric 
508*0fca6ea1SDimitry Andric static void removeDeadRecipes(VPlan &Plan) {
509bdd1243dSDimitry Andric   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
510bdd1243dSDimitry Andric       Plan.getEntry());
51181ad6265SDimitry Andric 
51281ad6265SDimitry Andric   for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) {
51381ad6265SDimitry Andric     // The recipes in the block are processed in reverse order, to catch chains
51481ad6265SDimitry Andric     // of dead recipes.
51581ad6265SDimitry Andric     for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
516*0fca6ea1SDimitry Andric       if (isDeadRecipe(R))
51781ad6265SDimitry Andric         R.eraseFromParent();
51881ad6265SDimitry Andric     }
51981ad6265SDimitry Andric   }
52081ad6265SDimitry Andric }
52181ad6265SDimitry Andric 
522*0fca6ea1SDimitry Andric static VPScalarIVStepsRecipe *
523*0fca6ea1SDimitry Andric createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
524*0fca6ea1SDimitry Andric                     Instruction::BinaryOps InductionOpcode,
525*0fca6ea1SDimitry Andric                     FPMathOperator *FPBinOp, ScalarEvolution &SE,
526*0fca6ea1SDimitry Andric                     Instruction *TruncI, VPValue *StartV, VPValue *Step,
527*0fca6ea1SDimitry Andric                     VPBasicBlock::iterator IP) {
5285f757f3fSDimitry Andric   VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
5295f757f3fSDimitry Andric   VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
530*0fca6ea1SDimitry Andric   VPSingleDefRecipe *BaseIV = CanonicalIV;
531*0fca6ea1SDimitry Andric   if (!CanonicalIV->isCanonical(Kind, StartV, Step)) {
532*0fca6ea1SDimitry Andric     BaseIV = new VPDerivedIVRecipe(Kind, FPBinOp, StartV, CanonicalIV, Step);
533*0fca6ea1SDimitry Andric     HeaderVPBB->insert(BaseIV, IP);
5345f757f3fSDimitry Andric   }
5355f757f3fSDimitry Andric 
536*0fca6ea1SDimitry Andric   // Truncate base induction if needed.
537*0fca6ea1SDimitry Andric   VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(),
538*0fca6ea1SDimitry Andric                           SE.getContext());
539*0fca6ea1SDimitry Andric   Type *ResultTy = TypeInfo.inferScalarType(BaseIV);
540*0fca6ea1SDimitry Andric   if (TruncI) {
541*0fca6ea1SDimitry Andric     Type *TruncTy = TruncI->getType();
542*0fca6ea1SDimitry Andric     assert(ResultTy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits() &&
543*0fca6ea1SDimitry Andric            "Not truncating.");
544*0fca6ea1SDimitry Andric     assert(ResultTy->isIntegerTy() && "Truncation requires an integer type");
545*0fca6ea1SDimitry Andric     BaseIV = new VPScalarCastRecipe(Instruction::Trunc, BaseIV, TruncTy);
546*0fca6ea1SDimitry Andric     HeaderVPBB->insert(BaseIV, IP);
547*0fca6ea1SDimitry Andric     ResultTy = TruncTy;
548*0fca6ea1SDimitry Andric   }
549*0fca6ea1SDimitry Andric 
550*0fca6ea1SDimitry Andric   // Truncate step if needed.
551*0fca6ea1SDimitry Andric   Type *StepTy = TypeInfo.inferScalarType(Step);
552*0fca6ea1SDimitry Andric   if (ResultTy != StepTy) {
553*0fca6ea1SDimitry Andric     assert(StepTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits() &&
554*0fca6ea1SDimitry Andric            "Not truncating.");
555*0fca6ea1SDimitry Andric     assert(StepTy->isIntegerTy() && "Truncation requires an integer type");
556*0fca6ea1SDimitry Andric     Step = new VPScalarCastRecipe(Instruction::Trunc, Step, ResultTy);
557*0fca6ea1SDimitry Andric     auto *VecPreheader =
558*0fca6ea1SDimitry Andric         cast<VPBasicBlock>(HeaderVPBB->getSingleHierarchicalPredecessor());
559*0fca6ea1SDimitry Andric     VecPreheader->appendRecipe(Step->getDefiningRecipe());
560*0fca6ea1SDimitry Andric   }
561*0fca6ea1SDimitry Andric 
562*0fca6ea1SDimitry Andric   VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(
563*0fca6ea1SDimitry Andric       BaseIV, Step, InductionOpcode,
564*0fca6ea1SDimitry Andric       FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags());
5655f757f3fSDimitry Andric   HeaderVPBB->insert(Steps, IP);
5665f757f3fSDimitry Andric   return Steps;
5675f757f3fSDimitry Andric }
5685f757f3fSDimitry Andric 
569*0fca6ea1SDimitry Andric /// Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd
570*0fca6ea1SDimitry Andric /// (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as
571*0fca6ea1SDimitry Andric /// VPWidenPointerInductionRecipe will generate vectors only. If some users
572*0fca6ea1SDimitry Andric /// require vectors while other require scalars, the scalar uses need to extract
573*0fca6ea1SDimitry Andric /// the scalars from the generated vectors (Note that this is different to how
574*0fca6ea1SDimitry Andric /// int/fp inductions are handled). Also optimize VPWidenIntOrFpInductionRecipe,
575*0fca6ea1SDimitry Andric /// if any of its users needs scalar values, by providing them scalar steps
576*0fca6ea1SDimitry Andric /// built on the canonical scalar IV and update the original IV's users. This is
577*0fca6ea1SDimitry Andric /// an optional optimization to reduce the needs of vector extracts.
578*0fca6ea1SDimitry Andric static void legalizeAndOptimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
57981ad6265SDimitry Andric   SmallVector<VPRecipeBase *> ToRemove;
58081ad6265SDimitry Andric   VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
58181ad6265SDimitry Andric   bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
582*0fca6ea1SDimitry Andric   VPBasicBlock::iterator InsertPt = HeaderVPBB->getFirstNonPhi();
58381ad6265SDimitry Andric   for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
584*0fca6ea1SDimitry Andric     // Replace wide pointer inductions which have only their scalars used by
585*0fca6ea1SDimitry Andric     // PtrAdd(IndStart, ScalarIVSteps (0, Step)).
586*0fca6ea1SDimitry Andric     if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {
587*0fca6ea1SDimitry Andric       if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF()))
588*0fca6ea1SDimitry Andric         continue;
589*0fca6ea1SDimitry Andric 
590*0fca6ea1SDimitry Andric       const InductionDescriptor &ID = PtrIV->getInductionDescriptor();
591*0fca6ea1SDimitry Andric       VPValue *StartV =
592*0fca6ea1SDimitry Andric           Plan.getOrAddLiveIn(ConstantInt::get(ID.getStep()->getType(), 0));
593*0fca6ea1SDimitry Andric       VPValue *StepV = PtrIV->getOperand(1);
594*0fca6ea1SDimitry Andric       VPScalarIVStepsRecipe *Steps = createScalarIVSteps(
595*0fca6ea1SDimitry Andric           Plan, InductionDescriptor::IK_IntInduction, Instruction::Add, nullptr,
596*0fca6ea1SDimitry Andric           SE, nullptr, StartV, StepV, InsertPt);
597*0fca6ea1SDimitry Andric 
598*0fca6ea1SDimitry Andric       auto *Recipe = new VPInstruction(VPInstruction::PtrAdd,
599*0fca6ea1SDimitry Andric                                        {PtrIV->getStartValue(), Steps},
600*0fca6ea1SDimitry Andric                                        PtrIV->getDebugLoc(), "next.gep");
601*0fca6ea1SDimitry Andric 
602*0fca6ea1SDimitry Andric       Recipe->insertAfter(Steps);
603*0fca6ea1SDimitry Andric       PtrIV->replaceAllUsesWith(Recipe);
604*0fca6ea1SDimitry Andric       continue;
605*0fca6ea1SDimitry Andric     }
606*0fca6ea1SDimitry Andric 
607*0fca6ea1SDimitry Andric     // Replace widened induction with scalar steps for users that only use
608*0fca6ea1SDimitry Andric     // scalars.
609bdd1243dSDimitry Andric     auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
610bdd1243dSDimitry Andric     if (!WideIV)
61181ad6265SDimitry Andric       continue;
612bdd1243dSDimitry Andric     if (HasOnlyVectorVFs && none_of(WideIV->users(), [WideIV](VPUser *U) {
613bdd1243dSDimitry Andric           return U->usesScalars(WideIV);
614bdd1243dSDimitry Andric         }))
61581ad6265SDimitry Andric       continue;
61681ad6265SDimitry Andric 
617bdd1243dSDimitry Andric     const InductionDescriptor &ID = WideIV->getInductionDescriptor();
618*0fca6ea1SDimitry Andric     VPScalarIVStepsRecipe *Steps = createScalarIVSteps(
619*0fca6ea1SDimitry Andric         Plan, ID.getKind(), ID.getInductionOpcode(),
620*0fca6ea1SDimitry Andric         dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()), SE,
621*0fca6ea1SDimitry Andric         WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
622*0fca6ea1SDimitry Andric         InsertPt);
623bdd1243dSDimitry Andric 
6245f757f3fSDimitry Andric     // Update scalar users of IV to use Step instead.
6255f757f3fSDimitry Andric     if (!HasOnlyVectorVFs)
6265f757f3fSDimitry Andric       WideIV->replaceAllUsesWith(Steps);
6275f757f3fSDimitry Andric     else
6285f757f3fSDimitry Andric       WideIV->replaceUsesWithIf(Steps, [WideIV](VPUser &U, unsigned) {
6295f757f3fSDimitry Andric         return U.usesScalars(WideIV);
6305f757f3fSDimitry Andric       });
63181ad6265SDimitry Andric   }
63281ad6265SDimitry Andric }
63381ad6265SDimitry Andric 
634*0fca6ea1SDimitry Andric /// Remove redundant EpxandSCEVRecipes in \p Plan's entry block by replacing
635*0fca6ea1SDimitry Andric /// them with already existing recipes expanding the same SCEV expression.
636*0fca6ea1SDimitry Andric static void removeRedundantExpandSCEVRecipes(VPlan &Plan) {
63781ad6265SDimitry Andric   DenseMap<const SCEV *, VPValue *> SCEV2VPV;
63881ad6265SDimitry Andric 
63981ad6265SDimitry Andric   for (VPRecipeBase &R :
64081ad6265SDimitry Andric        make_early_inc_range(*Plan.getEntry()->getEntryBasicBlock())) {
64181ad6265SDimitry Andric     auto *ExpR = dyn_cast<VPExpandSCEVRecipe>(&R);
64281ad6265SDimitry Andric     if (!ExpR)
64381ad6265SDimitry Andric       continue;
64481ad6265SDimitry Andric 
64581ad6265SDimitry Andric     auto I = SCEV2VPV.insert({ExpR->getSCEV(), ExpR});
64681ad6265SDimitry Andric     if (I.second)
64781ad6265SDimitry Andric       continue;
64881ad6265SDimitry Andric     ExpR->replaceAllUsesWith(I.first->second);
64981ad6265SDimitry Andric     ExpR->eraseFromParent();
65081ad6265SDimitry Andric   }
65181ad6265SDimitry Andric }
652bdd1243dSDimitry Andric 
653*0fca6ea1SDimitry Andric static void recursivelyDeleteDeadRecipes(VPValue *V) {
654*0fca6ea1SDimitry Andric   SmallVector<VPValue *> WorkList;
655*0fca6ea1SDimitry Andric   SmallPtrSet<VPValue *, 8> Seen;
656*0fca6ea1SDimitry Andric   WorkList.push_back(V);
657bdd1243dSDimitry Andric 
658*0fca6ea1SDimitry Andric   while (!WorkList.empty()) {
659*0fca6ea1SDimitry Andric     VPValue *Cur = WorkList.pop_back_val();
660*0fca6ea1SDimitry Andric     if (!Seen.insert(Cur).second)
661*0fca6ea1SDimitry Andric       continue;
662*0fca6ea1SDimitry Andric     VPRecipeBase *R = Cur->getDefiningRecipe();
663*0fca6ea1SDimitry Andric     if (!R)
664*0fca6ea1SDimitry Andric       continue;
665*0fca6ea1SDimitry Andric     if (!isDeadRecipe(*R))
666*0fca6ea1SDimitry Andric       continue;
667*0fca6ea1SDimitry Andric     WorkList.append(R->op_begin(), R->op_end());
668*0fca6ea1SDimitry Andric     R->eraseFromParent();
669*0fca6ea1SDimitry Andric   }
670bdd1243dSDimitry Andric }
671bdd1243dSDimitry Andric 
672bdd1243dSDimitry Andric void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
673bdd1243dSDimitry Andric                                          unsigned BestUF,
674bdd1243dSDimitry Andric                                          PredicatedScalarEvolution &PSE) {
675bdd1243dSDimitry Andric   assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
676bdd1243dSDimitry Andric   assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
677bdd1243dSDimitry Andric   VPBasicBlock *ExitingVPBB =
678bdd1243dSDimitry Andric       Plan.getVectorLoopRegion()->getExitingBasicBlock();
679*0fca6ea1SDimitry Andric   auto *Term = &ExitingVPBB->back();
680bdd1243dSDimitry Andric   // Try to simplify the branch condition if TC <= VF * UF when preparing to
681bdd1243dSDimitry Andric   // execute the plan for the main vector loop. We only do this if the
682bdd1243dSDimitry Andric   // terminator is:
683bdd1243dSDimitry Andric   //  1. BranchOnCount, or
684bdd1243dSDimitry Andric   //  2. BranchOnCond where the input is Not(ActiveLaneMask).
685*0fca6ea1SDimitry Andric   using namespace llvm::VPlanPatternMatch;
686*0fca6ea1SDimitry Andric   if (!match(Term, m_BranchOnCount(m_VPValue(), m_VPValue())) &&
687*0fca6ea1SDimitry Andric       !match(Term,
688*0fca6ea1SDimitry Andric              m_BranchOnCond(m_Not(m_ActiveLaneMask(m_VPValue(), m_VPValue())))))
689bdd1243dSDimitry Andric     return;
690bdd1243dSDimitry Andric 
691bdd1243dSDimitry Andric   Type *IdxTy =
692bdd1243dSDimitry Andric       Plan.getCanonicalIV()->getStartValue()->getLiveInIRValue()->getType();
693bdd1243dSDimitry Andric   const SCEV *TripCount = createTripCountSCEV(IdxTy, PSE);
694bdd1243dSDimitry Andric   ScalarEvolution &SE = *PSE.getSE();
695*0fca6ea1SDimitry Andric   ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF);
696*0fca6ea1SDimitry Andric   const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements);
697bdd1243dSDimitry Andric   if (TripCount->isZero() ||
698bdd1243dSDimitry Andric       !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C))
699bdd1243dSDimitry Andric     return;
700bdd1243dSDimitry Andric 
701bdd1243dSDimitry Andric   LLVMContext &Ctx = SE.getContext();
702*0fca6ea1SDimitry Andric   auto *BOC =
703*0fca6ea1SDimitry Andric       new VPInstruction(VPInstruction::BranchOnCond,
704*0fca6ea1SDimitry Andric                         {Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))});
705*0fca6ea1SDimitry Andric 
706*0fca6ea1SDimitry Andric   SmallVector<VPValue *> PossiblyDead(Term->operands());
707bdd1243dSDimitry Andric   Term->eraseFromParent();
708*0fca6ea1SDimitry Andric   for (VPValue *Op : PossiblyDead)
709*0fca6ea1SDimitry Andric     recursivelyDeleteDeadRecipes(Op);
710bdd1243dSDimitry Andric   ExitingVPBB->appendRecipe(BOC);
711bdd1243dSDimitry Andric   Plan.setVF(BestVF);
712bdd1243dSDimitry Andric   Plan.setUF(BestUF);
713bdd1243dSDimitry Andric   // TODO: Further simplifications are possible
714bdd1243dSDimitry Andric   //      1. Replace inductions with constants.
715bdd1243dSDimitry Andric   //      2. Replace vector loop region with VPBasicBlock.
716bdd1243dSDimitry Andric }
71706c3fb27SDimitry Andric 
71806c3fb27SDimitry Andric #ifndef NDEBUG
71906c3fb27SDimitry Andric static VPRegionBlock *GetReplicateRegion(VPRecipeBase *R) {
72006c3fb27SDimitry Andric   auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent());
72106c3fb27SDimitry Andric   if (Region && Region->isReplicator()) {
72206c3fb27SDimitry Andric     assert(Region->getNumSuccessors() == 1 &&
72306c3fb27SDimitry Andric            Region->getNumPredecessors() == 1 && "Expected SESE region!");
72406c3fb27SDimitry Andric     assert(R->getParent()->size() == 1 &&
72506c3fb27SDimitry Andric            "A recipe in an original replicator region must be the only "
72606c3fb27SDimitry Andric            "recipe in its block");
72706c3fb27SDimitry Andric     return Region;
72806c3fb27SDimitry Andric   }
72906c3fb27SDimitry Andric   return nullptr;
73006c3fb27SDimitry Andric }
73106c3fb27SDimitry Andric #endif
73206c3fb27SDimitry Andric 
73306c3fb27SDimitry Andric static bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B,
73406c3fb27SDimitry Andric                               VPDominatorTree &VPDT) {
73506c3fb27SDimitry Andric   if (A == B)
73606c3fb27SDimitry Andric     return false;
73706c3fb27SDimitry Andric 
73806c3fb27SDimitry Andric   auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) {
73906c3fb27SDimitry Andric     for (auto &R : *A->getParent()) {
74006c3fb27SDimitry Andric       if (&R == A)
74106c3fb27SDimitry Andric         return true;
74206c3fb27SDimitry Andric       if (&R == B)
74306c3fb27SDimitry Andric         return false;
74406c3fb27SDimitry Andric     }
74506c3fb27SDimitry Andric     llvm_unreachable("recipe not found");
74606c3fb27SDimitry Andric   };
74706c3fb27SDimitry Andric   const VPBlockBase *ParentA = A->getParent();
74806c3fb27SDimitry Andric   const VPBlockBase *ParentB = B->getParent();
74906c3fb27SDimitry Andric   if (ParentA == ParentB)
75006c3fb27SDimitry Andric     return LocalComesBefore(A, B);
75106c3fb27SDimitry Andric 
75206c3fb27SDimitry Andric   assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) &&
75306c3fb27SDimitry Andric          "No replicate regions expected at this point");
75406c3fb27SDimitry Andric   assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) &&
75506c3fb27SDimitry Andric          "No replicate regions expected at this point");
75606c3fb27SDimitry Andric   return VPDT.properlyDominates(ParentA, ParentB);
75706c3fb27SDimitry Andric }
75806c3fb27SDimitry Andric 
75906c3fb27SDimitry Andric /// Sink users of \p FOR after the recipe defining the previous value \p
76006c3fb27SDimitry Andric /// Previous of the recurrence. \returns true if all users of \p FOR could be
76106c3fb27SDimitry Andric /// re-arranged as needed or false if it is not possible.
76206c3fb27SDimitry Andric static bool
76306c3fb27SDimitry Andric sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
76406c3fb27SDimitry Andric                                  VPRecipeBase *Previous,
76506c3fb27SDimitry Andric                                  VPDominatorTree &VPDT) {
76606c3fb27SDimitry Andric   // Collect recipes that need sinking.
76706c3fb27SDimitry Andric   SmallVector<VPRecipeBase *> WorkList;
76806c3fb27SDimitry Andric   SmallPtrSet<VPRecipeBase *, 8> Seen;
76906c3fb27SDimitry Andric   Seen.insert(Previous);
77006c3fb27SDimitry Andric   auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) {
77106c3fb27SDimitry Andric     // The previous value must not depend on the users of the recurrence phi. In
77206c3fb27SDimitry Andric     // that case, FOR is not a fixed order recurrence.
77306c3fb27SDimitry Andric     if (SinkCandidate == Previous)
77406c3fb27SDimitry Andric       return false;
77506c3fb27SDimitry Andric 
77606c3fb27SDimitry Andric     if (isa<VPHeaderPHIRecipe>(SinkCandidate) ||
77706c3fb27SDimitry Andric         !Seen.insert(SinkCandidate).second ||
77806c3fb27SDimitry Andric         properlyDominates(Previous, SinkCandidate, VPDT))
77906c3fb27SDimitry Andric       return true;
78006c3fb27SDimitry Andric 
78106c3fb27SDimitry Andric     if (SinkCandidate->mayHaveSideEffects())
78206c3fb27SDimitry Andric       return false;
78306c3fb27SDimitry Andric 
78406c3fb27SDimitry Andric     WorkList.push_back(SinkCandidate);
78506c3fb27SDimitry Andric     return true;
78606c3fb27SDimitry Andric   };
78706c3fb27SDimitry Andric 
78806c3fb27SDimitry Andric   // Recursively sink users of FOR after Previous.
78906c3fb27SDimitry Andric   WorkList.push_back(FOR);
79006c3fb27SDimitry Andric   for (unsigned I = 0; I != WorkList.size(); ++I) {
79106c3fb27SDimitry Andric     VPRecipeBase *Current = WorkList[I];
79206c3fb27SDimitry Andric     assert(Current->getNumDefinedValues() == 1 &&
79306c3fb27SDimitry Andric            "only recipes with a single defined value expected");
79406c3fb27SDimitry Andric 
79506c3fb27SDimitry Andric     for (VPUser *User : Current->getVPSingleValue()->users()) {
79606c3fb27SDimitry Andric       if (auto *R = dyn_cast<VPRecipeBase>(User))
79706c3fb27SDimitry Andric         if (!TryToPushSinkCandidate(R))
79806c3fb27SDimitry Andric           return false;
79906c3fb27SDimitry Andric     }
80006c3fb27SDimitry Andric   }
80106c3fb27SDimitry Andric 
80206c3fb27SDimitry Andric   // Keep recipes to sink ordered by dominance so earlier instructions are
80306c3fb27SDimitry Andric   // processed first.
80406c3fb27SDimitry Andric   sort(WorkList, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) {
80506c3fb27SDimitry Andric     return properlyDominates(A, B, VPDT);
80606c3fb27SDimitry Andric   });
80706c3fb27SDimitry Andric 
80806c3fb27SDimitry Andric   for (VPRecipeBase *SinkCandidate : WorkList) {
80906c3fb27SDimitry Andric     if (SinkCandidate == FOR)
81006c3fb27SDimitry Andric       continue;
81106c3fb27SDimitry Andric 
81206c3fb27SDimitry Andric     SinkCandidate->moveAfter(Previous);
81306c3fb27SDimitry Andric     Previous = SinkCandidate;
81406c3fb27SDimitry Andric   }
81506c3fb27SDimitry Andric   return true;
81606c3fb27SDimitry Andric }
81706c3fb27SDimitry Andric 
81806c3fb27SDimitry Andric bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
819*0fca6ea1SDimitry Andric                                                   VPBuilder &LoopBuilder) {
82006c3fb27SDimitry Andric   VPDominatorTree VPDT;
82106c3fb27SDimitry Andric   VPDT.recalculate(Plan);
82206c3fb27SDimitry Andric 
82306c3fb27SDimitry Andric   SmallVector<VPFirstOrderRecurrencePHIRecipe *> RecurrencePhis;
82406c3fb27SDimitry Andric   for (VPRecipeBase &R :
82506c3fb27SDimitry Andric        Plan.getVectorLoopRegion()->getEntry()->getEntryBasicBlock()->phis())
82606c3fb27SDimitry Andric     if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
82706c3fb27SDimitry Andric       RecurrencePhis.push_back(FOR);
82806c3fb27SDimitry Andric 
829*0fca6ea1SDimitry Andric   VPBasicBlock *MiddleVPBB =
830*0fca6ea1SDimitry Andric       cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
831*0fca6ea1SDimitry Andric   VPBuilder MiddleBuilder;
832*0fca6ea1SDimitry Andric   // Set insert point so new recipes are inserted before terminator and
833*0fca6ea1SDimitry Andric   // condition, if there is either the former or both.
834*0fca6ea1SDimitry Andric   if (auto *Term =
835*0fca6ea1SDimitry Andric           dyn_cast_or_null<VPInstruction>(MiddleVPBB->getTerminator())) {
836*0fca6ea1SDimitry Andric     if (auto *Cmp = dyn_cast<VPInstruction>(Term->getOperand(0)))
837*0fca6ea1SDimitry Andric       MiddleBuilder.setInsertPoint(Cmp);
838*0fca6ea1SDimitry Andric     else
839*0fca6ea1SDimitry Andric       MiddleBuilder.setInsertPoint(Term);
840*0fca6ea1SDimitry Andric   } else
841*0fca6ea1SDimitry Andric     MiddleBuilder.setInsertPoint(MiddleVPBB);
842*0fca6ea1SDimitry Andric 
84306c3fb27SDimitry Andric   for (VPFirstOrderRecurrencePHIRecipe *FOR : RecurrencePhis) {
84406c3fb27SDimitry Andric     SmallPtrSet<VPFirstOrderRecurrencePHIRecipe *, 4> SeenPhis;
84506c3fb27SDimitry Andric     VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
84606c3fb27SDimitry Andric     // Fixed-order recurrences do not contain cycles, so this loop is guaranteed
84706c3fb27SDimitry Andric     // to terminate.
84806c3fb27SDimitry Andric     while (auto *PrevPhi =
84906c3fb27SDimitry Andric                dyn_cast_or_null<VPFirstOrderRecurrencePHIRecipe>(Previous)) {
85006c3fb27SDimitry Andric       assert(PrevPhi->getParent() == FOR->getParent());
85106c3fb27SDimitry Andric       assert(SeenPhis.insert(PrevPhi).second);
85206c3fb27SDimitry Andric       Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
85306c3fb27SDimitry Andric     }
85406c3fb27SDimitry Andric 
85506c3fb27SDimitry Andric     if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT))
85606c3fb27SDimitry Andric       return false;
85706c3fb27SDimitry Andric 
85806c3fb27SDimitry Andric     // Introduce a recipe to combine the incoming and previous values of a
85906c3fb27SDimitry Andric     // fixed-order recurrence.
86006c3fb27SDimitry Andric     VPBasicBlock *InsertBlock = Previous->getParent();
86106c3fb27SDimitry Andric     if (isa<VPHeaderPHIRecipe>(Previous))
862*0fca6ea1SDimitry Andric       LoopBuilder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi());
86306c3fb27SDimitry Andric     else
864*0fca6ea1SDimitry Andric       LoopBuilder.setInsertPoint(InsertBlock,
865*0fca6ea1SDimitry Andric                                  std::next(Previous->getIterator()));
86606c3fb27SDimitry Andric 
86706c3fb27SDimitry Andric     auto *RecurSplice = cast<VPInstruction>(
868*0fca6ea1SDimitry Andric         LoopBuilder.createNaryOp(VPInstruction::FirstOrderRecurrenceSplice,
86906c3fb27SDimitry Andric                                  {FOR, FOR->getBackedgeValue()}));
87006c3fb27SDimitry Andric 
87106c3fb27SDimitry Andric     FOR->replaceAllUsesWith(RecurSplice);
87206c3fb27SDimitry Andric     // Set the first operand of RecurSplice to FOR again, after replacing
87306c3fb27SDimitry Andric     // all users.
87406c3fb27SDimitry Andric     RecurSplice->setOperand(0, FOR);
875*0fca6ea1SDimitry Andric 
876*0fca6ea1SDimitry Andric     // This is the second phase of vectorizing first-order recurrences. An
877*0fca6ea1SDimitry Andric     // overview of the transformation is described below. Suppose we have the
878*0fca6ea1SDimitry Andric     // following loop with some use after the loop of the last a[i-1],
879*0fca6ea1SDimitry Andric     //
880*0fca6ea1SDimitry Andric     //   for (int i = 0; i < n; ++i) {
881*0fca6ea1SDimitry Andric     //     t = a[i - 1];
882*0fca6ea1SDimitry Andric     //     b[i] = a[i] - t;
883*0fca6ea1SDimitry Andric     //   }
884*0fca6ea1SDimitry Andric     //   use t;
885*0fca6ea1SDimitry Andric     //
886*0fca6ea1SDimitry Andric     // There is a first-order recurrence on "a". For this loop, the shorthand
887*0fca6ea1SDimitry Andric     // scalar IR looks like:
888*0fca6ea1SDimitry Andric     //
889*0fca6ea1SDimitry Andric     //   scalar.ph:
890*0fca6ea1SDimitry Andric     //     s_init = a[-1]
891*0fca6ea1SDimitry Andric     //     br scalar.body
892*0fca6ea1SDimitry Andric     //
893*0fca6ea1SDimitry Andric     //   scalar.body:
894*0fca6ea1SDimitry Andric     //     i = phi [0, scalar.ph], [i+1, scalar.body]
895*0fca6ea1SDimitry Andric     //     s1 = phi [s_init, scalar.ph], [s2, scalar.body]
896*0fca6ea1SDimitry Andric     //     s2 = a[i]
897*0fca6ea1SDimitry Andric     //     b[i] = s2 - s1
898*0fca6ea1SDimitry Andric     //     br cond, scalar.body, exit.block
899*0fca6ea1SDimitry Andric     //
900*0fca6ea1SDimitry Andric     //   exit.block:
901*0fca6ea1SDimitry Andric     //     use = lcssa.phi [s1, scalar.body]
902*0fca6ea1SDimitry Andric     //
903*0fca6ea1SDimitry Andric     // In this example, s1 is a recurrence because it's value depends on the
904*0fca6ea1SDimitry Andric     // previous iteration. In the first phase of vectorization, we created a
905*0fca6ea1SDimitry Andric     // vector phi v1 for s1. We now complete the vectorization and produce the
906*0fca6ea1SDimitry Andric     // shorthand vector IR shown below (for VF = 4, UF = 1).
907*0fca6ea1SDimitry Andric     //
908*0fca6ea1SDimitry Andric     //   vector.ph:
909*0fca6ea1SDimitry Andric     //     v_init = vector(..., ..., ..., a[-1])
910*0fca6ea1SDimitry Andric     //     br vector.body
911*0fca6ea1SDimitry Andric     //
912*0fca6ea1SDimitry Andric     //   vector.body
913*0fca6ea1SDimitry Andric     //     i = phi [0, vector.ph], [i+4, vector.body]
914*0fca6ea1SDimitry Andric     //     v1 = phi [v_init, vector.ph], [v2, vector.body]
915*0fca6ea1SDimitry Andric     //     v2 = a[i, i+1, i+2, i+3];
916*0fca6ea1SDimitry Andric     //     v3 = vector(v1(3), v2(0, 1, 2))
917*0fca6ea1SDimitry Andric     //     b[i, i+1, i+2, i+3] = v2 - v3
918*0fca6ea1SDimitry Andric     //     br cond, vector.body, middle.block
919*0fca6ea1SDimitry Andric     //
920*0fca6ea1SDimitry Andric     //   middle.block:
921*0fca6ea1SDimitry Andric     //     s_penultimate = v2(2) = v3(3)
922*0fca6ea1SDimitry Andric     //     s_resume = v2(3)
923*0fca6ea1SDimitry Andric     //     br cond, scalar.ph, exit.block
924*0fca6ea1SDimitry Andric     //
925*0fca6ea1SDimitry Andric     //   scalar.ph:
926*0fca6ea1SDimitry Andric     //     s_init' = phi [s_resume, middle.block], [s_init, otherwise]
927*0fca6ea1SDimitry Andric     //     br scalar.body
928*0fca6ea1SDimitry Andric     //
929*0fca6ea1SDimitry Andric     //   scalar.body:
930*0fca6ea1SDimitry Andric     //     i = phi [0, scalar.ph], [i+1, scalar.body]
931*0fca6ea1SDimitry Andric     //     s1 = phi [s_init', scalar.ph], [s2, scalar.body]
932*0fca6ea1SDimitry Andric     //     s2 = a[i]
933*0fca6ea1SDimitry Andric     //     b[i] = s2 - s1
934*0fca6ea1SDimitry Andric     //     br cond, scalar.body, exit.block
935*0fca6ea1SDimitry Andric     //
936*0fca6ea1SDimitry Andric     //   exit.block:
937*0fca6ea1SDimitry Andric     //     lo = lcssa.phi [s1, scalar.body], [s.penultimate, middle.block]
938*0fca6ea1SDimitry Andric     //
939*0fca6ea1SDimitry Andric     // After execution completes the vector loop, we extract the next value of
940*0fca6ea1SDimitry Andric     // the recurrence (x) to use as the initial value in the scalar loop. This
941*0fca6ea1SDimitry Andric     // is modeled by ExtractFromEnd.
942*0fca6ea1SDimitry Andric     Type *IntTy = Plan.getCanonicalIV()->getScalarType();
943*0fca6ea1SDimitry Andric 
944*0fca6ea1SDimitry Andric     // Extract the penultimate value of the recurrence and update VPLiveOut
945*0fca6ea1SDimitry Andric     // users of the recurrence splice. Note that the extract of the final value
946*0fca6ea1SDimitry Andric     // used to resume in the scalar loop is created earlier during VPlan
947*0fca6ea1SDimitry Andric     // construction.
948*0fca6ea1SDimitry Andric     auto *Penultimate = cast<VPInstruction>(MiddleBuilder.createNaryOp(
949*0fca6ea1SDimitry Andric         VPInstruction::ExtractFromEnd,
950*0fca6ea1SDimitry Andric         {FOR->getBackedgeValue(),
951*0fca6ea1SDimitry Andric          Plan.getOrAddLiveIn(ConstantInt::get(IntTy, 2))},
952*0fca6ea1SDimitry Andric         {}, "vector.recur.extract.for.phi"));
953*0fca6ea1SDimitry Andric     RecurSplice->replaceUsesWithIf(
954*0fca6ea1SDimitry Andric         Penultimate, [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); });
95506c3fb27SDimitry Andric   }
95606c3fb27SDimitry Andric   return true;
95706c3fb27SDimitry Andric }
95806c3fb27SDimitry Andric 
959*0fca6ea1SDimitry Andric static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) {
960*0fca6ea1SDimitry Andric   SetVector<VPUser *> Users(V->user_begin(), V->user_end());
961*0fca6ea1SDimitry Andric   for (unsigned I = 0; I != Users.size(); ++I) {
962*0fca6ea1SDimitry Andric     VPRecipeBase *Cur = dyn_cast<VPRecipeBase>(Users[I]);
963*0fca6ea1SDimitry Andric     if (!Cur || isa<VPHeaderPHIRecipe>(Cur))
964*0fca6ea1SDimitry Andric       continue;
965*0fca6ea1SDimitry Andric     for (VPValue *V : Cur->definedValues())
966*0fca6ea1SDimitry Andric       Users.insert(V->user_begin(), V->user_end());
967*0fca6ea1SDimitry Andric   }
968*0fca6ea1SDimitry Andric   return Users.takeVector();
969*0fca6ea1SDimitry Andric }
970*0fca6ea1SDimitry Andric 
97106c3fb27SDimitry Andric void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
97206c3fb27SDimitry Andric   for (VPRecipeBase &R :
97306c3fb27SDimitry Andric        Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
97406c3fb27SDimitry Andric     auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
97506c3fb27SDimitry Andric     if (!PhiR)
97606c3fb27SDimitry Andric       continue;
97706c3fb27SDimitry Andric     const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
97806c3fb27SDimitry Andric     RecurKind RK = RdxDesc.getRecurrenceKind();
97906c3fb27SDimitry Andric     if (RK != RecurKind::Add && RK != RecurKind::Mul)
98006c3fb27SDimitry Andric       continue;
98106c3fb27SDimitry Andric 
982*0fca6ea1SDimitry Andric     for (VPUser *U : collectUsersRecursively(PhiR))
983*0fca6ea1SDimitry Andric       if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(U)) {
98406c3fb27SDimitry Andric         RecWithFlags->dropPoisonGeneratingFlags();
98506c3fb27SDimitry Andric       }
98606c3fb27SDimitry Andric   }
98706c3fb27SDimitry Andric }
9885f757f3fSDimitry Andric 
9895f757f3fSDimitry Andric /// Try to simplify recipe \p R.
9905f757f3fSDimitry Andric static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
991*0fca6ea1SDimitry Andric   using namespace llvm::VPlanPatternMatch;
992*0fca6ea1SDimitry Andric   // Try to remove redundant blend recipes.
993*0fca6ea1SDimitry Andric   if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
994*0fca6ea1SDimitry Andric     VPValue *Inc0 = Blend->getIncomingValue(0);
995*0fca6ea1SDimitry Andric     for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
996*0fca6ea1SDimitry Andric       if (Inc0 != Blend->getIncomingValue(I) &&
997*0fca6ea1SDimitry Andric           !match(Blend->getMask(I), m_False()))
998*0fca6ea1SDimitry Andric         return;
999*0fca6ea1SDimitry Andric     Blend->replaceAllUsesWith(Inc0);
1000*0fca6ea1SDimitry Andric     Blend->eraseFromParent();
1001*0fca6ea1SDimitry Andric     return;
10025f757f3fSDimitry Andric   }
1003*0fca6ea1SDimitry Andric 
1004*0fca6ea1SDimitry Andric   VPValue *A;
1005*0fca6ea1SDimitry Andric   if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
10065f757f3fSDimitry Andric     VPValue *Trunc = R.getVPSingleValue();
10075f757f3fSDimitry Andric     Type *TruncTy = TypeInfo.inferScalarType(Trunc);
10085f757f3fSDimitry Andric     Type *ATy = TypeInfo.inferScalarType(A);
10095f757f3fSDimitry Andric     if (TruncTy == ATy) {
10105f757f3fSDimitry Andric       Trunc->replaceAllUsesWith(A);
10111db9f3b2SDimitry Andric     } else {
10121db9f3b2SDimitry Andric       // Don't replace a scalarizing recipe with a widened cast.
10131db9f3b2SDimitry Andric       if (isa<VPReplicateRecipe>(&R))
1014*0fca6ea1SDimitry Andric         return;
10151db9f3b2SDimitry Andric       if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
1016*0fca6ea1SDimitry Andric 
1017*0fca6ea1SDimitry Andric         unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
1018*0fca6ea1SDimitry Andric                                  ? Instruction::SExt
1019*0fca6ea1SDimitry Andric                                  : Instruction::ZExt;
10205f757f3fSDimitry Andric         auto *VPC =
10215f757f3fSDimitry Andric             new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
1022*0fca6ea1SDimitry Andric         if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
1023*0fca6ea1SDimitry Andric           // UnderlyingExt has distinct return type, used to retain legacy cost.
1024*0fca6ea1SDimitry Andric           VPC->setUnderlyingValue(UnderlyingExt);
1025*0fca6ea1SDimitry Andric         }
10265f757f3fSDimitry Andric         VPC->insertBefore(&R);
10275f757f3fSDimitry Andric         Trunc->replaceAllUsesWith(VPC);
10285f757f3fSDimitry Andric       } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
10295f757f3fSDimitry Andric         auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
10305f757f3fSDimitry Andric         VPC->insertBefore(&R);
10315f757f3fSDimitry Andric         Trunc->replaceAllUsesWith(VPC);
10325f757f3fSDimitry Andric       }
10331db9f3b2SDimitry Andric     }
10345f757f3fSDimitry Andric #ifndef NDEBUG
10355f757f3fSDimitry Andric     // Verify that the cached type info is for both A and its users is still
10365f757f3fSDimitry Andric     // accurate by comparing it to freshly computed types.
1037*0fca6ea1SDimitry Andric     VPTypeAnalysis TypeInfo2(
1038*0fca6ea1SDimitry Andric         R.getParent()->getPlan()->getCanonicalIV()->getScalarType(),
1039*0fca6ea1SDimitry Andric         TypeInfo.getContext());
10405f757f3fSDimitry Andric     assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
10415f757f3fSDimitry Andric     for (VPUser *U : A->users()) {
10425f757f3fSDimitry Andric       auto *R = dyn_cast<VPRecipeBase>(U);
10435f757f3fSDimitry Andric       if (!R)
10445f757f3fSDimitry Andric         continue;
10455f757f3fSDimitry Andric       for (VPValue *VPV : R->definedValues())
10465f757f3fSDimitry Andric         assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
10475f757f3fSDimitry Andric     }
10485f757f3fSDimitry Andric #endif
10495f757f3fSDimitry Andric   }
1050*0fca6ea1SDimitry Andric 
1051*0fca6ea1SDimitry Andric   // Simplify (X && Y) || (X && !Y) -> X.
1052*0fca6ea1SDimitry Andric   // TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
1053*0fca6ea1SDimitry Andric   // && (Y || Z) and (X || !X) into true. This requires queuing newly created
1054*0fca6ea1SDimitry Andric   // recipes to be visited during simplification.
1055*0fca6ea1SDimitry Andric   VPValue *X, *Y, *X1, *Y1;
1056*0fca6ea1SDimitry Andric   if (match(&R,
1057*0fca6ea1SDimitry Andric             m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
1058*0fca6ea1SDimitry Andric                          m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
1059*0fca6ea1SDimitry Andric       X == X1 && Y == Y1) {
1060*0fca6ea1SDimitry Andric     R.getVPSingleValue()->replaceAllUsesWith(X);
1061*0fca6ea1SDimitry Andric     return;
10625f757f3fSDimitry Andric   }
1063*0fca6ea1SDimitry Andric 
1064*0fca6ea1SDimitry Andric   if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
1065*0fca6ea1SDimitry Andric     return R.getVPSingleValue()->replaceAllUsesWith(A);
10665f757f3fSDimitry Andric }
10675f757f3fSDimitry Andric 
10685f757f3fSDimitry Andric /// Try to simplify the recipes in \p Plan.
10695f757f3fSDimitry Andric static void simplifyRecipes(VPlan &Plan, LLVMContext &Ctx) {
10705f757f3fSDimitry Andric   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
10715f757f3fSDimitry Andric       Plan.getEntry());
1072*0fca6ea1SDimitry Andric   VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(), Ctx);
10735f757f3fSDimitry Andric   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
10745f757f3fSDimitry Andric     for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
10755f757f3fSDimitry Andric       simplifyRecipe(R, TypeInfo);
10765f757f3fSDimitry Andric     }
10775f757f3fSDimitry Andric   }
10785f757f3fSDimitry Andric }
10795f757f3fSDimitry Andric 
10805f757f3fSDimitry Andric void VPlanTransforms::truncateToMinimalBitwidths(
10815f757f3fSDimitry Andric     VPlan &Plan, const MapVector<Instruction *, uint64_t> &MinBWs,
10825f757f3fSDimitry Andric     LLVMContext &Ctx) {
10835f757f3fSDimitry Andric #ifndef NDEBUG
10845f757f3fSDimitry Andric   // Count the processed recipes and cross check the count later with MinBWs
10855f757f3fSDimitry Andric   // size, to make sure all entries in MinBWs have been handled.
10865f757f3fSDimitry Andric   unsigned NumProcessedRecipes = 0;
10875f757f3fSDimitry Andric #endif
10885f757f3fSDimitry Andric   // Keep track of created truncates, so they can be re-used. Note that we
10895f757f3fSDimitry Andric   // cannot use RAUW after creating a new truncate, as this would could make
10905f757f3fSDimitry Andric   // other uses have different types for their operands, making them invalidly
10915f757f3fSDimitry Andric   // typed.
10925f757f3fSDimitry Andric   DenseMap<VPValue *, VPWidenCastRecipe *> ProcessedTruncs;
1093*0fca6ea1SDimitry Andric   VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(), Ctx);
10945f757f3fSDimitry Andric   VPBasicBlock *PH = Plan.getEntry();
10955f757f3fSDimitry Andric   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
10965f757f3fSDimitry Andric            vp_depth_first_deep(Plan.getVectorLoopRegion()))) {
10975f757f3fSDimitry Andric     for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
10985f757f3fSDimitry Andric       if (!isa<VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe,
1099*0fca6ea1SDimitry Andric                VPWidenSelectRecipe, VPWidenLoadRecipe>(&R))
11005f757f3fSDimitry Andric         continue;
11015f757f3fSDimitry Andric 
11025f757f3fSDimitry Andric       VPValue *ResultVPV = R.getVPSingleValue();
11035f757f3fSDimitry Andric       auto *UI = cast_or_null<Instruction>(ResultVPV->getUnderlyingValue());
11045f757f3fSDimitry Andric       unsigned NewResSizeInBits = MinBWs.lookup(UI);
11055f757f3fSDimitry Andric       if (!NewResSizeInBits)
11065f757f3fSDimitry Andric         continue;
11075f757f3fSDimitry Andric 
11085f757f3fSDimitry Andric #ifndef NDEBUG
11095f757f3fSDimitry Andric       NumProcessedRecipes++;
11105f757f3fSDimitry Andric #endif
11115f757f3fSDimitry Andric       // If the value wasn't vectorized, we must maintain the original scalar
11125f757f3fSDimitry Andric       // type. Skip those here, after incrementing NumProcessedRecipes. Also
11135f757f3fSDimitry Andric       // skip casts which do not need to be handled explicitly here, as
11145f757f3fSDimitry Andric       // redundant casts will be removed during recipe simplification.
11155f757f3fSDimitry Andric       if (isa<VPReplicateRecipe, VPWidenCastRecipe>(&R)) {
11165f757f3fSDimitry Andric #ifndef NDEBUG
11175f757f3fSDimitry Andric         // If any of the operands is a live-in and not used by VPWidenRecipe or
11185f757f3fSDimitry Andric         // VPWidenSelectRecipe, but in MinBWs, make sure it is counted as
11195f757f3fSDimitry Andric         // processed as well. When MinBWs is currently constructed, there is no
11205f757f3fSDimitry Andric         // information about whether recipes are widened or replicated and in
11215f757f3fSDimitry Andric         // case they are reciplicated the operands are not truncated. Counting
11225f757f3fSDimitry Andric         // them them here ensures we do not miss any recipes in MinBWs.
11235f757f3fSDimitry Andric         // TODO: Remove once the analysis is done on VPlan.
11245f757f3fSDimitry Andric         for (VPValue *Op : R.operands()) {
11255f757f3fSDimitry Andric           if (!Op->isLiveIn())
11265f757f3fSDimitry Andric             continue;
11275f757f3fSDimitry Andric           auto *UV = dyn_cast_or_null<Instruction>(Op->getUnderlyingValue());
11285f757f3fSDimitry Andric           if (UV && MinBWs.contains(UV) && !ProcessedTruncs.contains(Op) &&
11295f757f3fSDimitry Andric               all_of(Op->users(), [](VPUser *U) {
11305f757f3fSDimitry Andric                 return !isa<VPWidenRecipe, VPWidenSelectRecipe>(U);
11315f757f3fSDimitry Andric               })) {
11325f757f3fSDimitry Andric             // Add an entry to ProcessedTruncs to avoid counting the same
11335f757f3fSDimitry Andric             // operand multiple times.
11345f757f3fSDimitry Andric             ProcessedTruncs[Op] = nullptr;
11355f757f3fSDimitry Andric             NumProcessedRecipes += 1;
11365f757f3fSDimitry Andric           }
11375f757f3fSDimitry Andric         }
11385f757f3fSDimitry Andric #endif
11395f757f3fSDimitry Andric         continue;
11405f757f3fSDimitry Andric       }
11415f757f3fSDimitry Andric 
11425f757f3fSDimitry Andric       Type *OldResTy = TypeInfo.inferScalarType(ResultVPV);
11435f757f3fSDimitry Andric       unsigned OldResSizeInBits = OldResTy->getScalarSizeInBits();
11445f757f3fSDimitry Andric       assert(OldResTy->isIntegerTy() && "only integer types supported");
11455f757f3fSDimitry Andric       (void)OldResSizeInBits;
11465f757f3fSDimitry Andric 
11475f757f3fSDimitry Andric       auto *NewResTy = IntegerType::get(Ctx, NewResSizeInBits);
11485f757f3fSDimitry Andric 
11497a6dacacSDimitry Andric       // Any wrapping introduced by shrinking this operation shouldn't be
11507a6dacacSDimitry Andric       // considered undefined behavior. So, we can't unconditionally copy
11517a6dacacSDimitry Andric       // arithmetic wrapping flags to VPW.
11527a6dacacSDimitry Andric       if (auto *VPW = dyn_cast<VPRecipeWithIRFlags>(&R))
11537a6dacacSDimitry Andric         VPW->dropPoisonGeneratingFlags();
11547a6dacacSDimitry Andric 
1155*0fca6ea1SDimitry Andric       using namespace llvm::VPlanPatternMatch;
1156*0fca6ea1SDimitry Andric       if (OldResSizeInBits != NewResSizeInBits &&
1157*0fca6ea1SDimitry Andric           !match(&R, m_Binary<Instruction::ICmp>(m_VPValue(), m_VPValue()))) {
11587a6dacacSDimitry Andric         // Extend result to original width.
1159*0fca6ea1SDimitry Andric         auto *Ext =
1160*0fca6ea1SDimitry Andric             new VPWidenCastRecipe(Instruction::ZExt, ResultVPV, OldResTy);
11617a6dacacSDimitry Andric         Ext->insertAfter(&R);
11627a6dacacSDimitry Andric         ResultVPV->replaceAllUsesWith(Ext);
11637a6dacacSDimitry Andric         Ext->setOperand(0, ResultVPV);
1164*0fca6ea1SDimitry Andric         assert(OldResSizeInBits > NewResSizeInBits && "Nothing to shrink?");
1165*0fca6ea1SDimitry Andric       } else
1166*0fca6ea1SDimitry Andric         assert(
1167*0fca6ea1SDimitry Andric             match(&R, m_Binary<Instruction::ICmp>(m_VPValue(), m_VPValue())) &&
1168*0fca6ea1SDimitry Andric             "Only ICmps should not need extending the result.");
11697a6dacacSDimitry Andric 
1170*0fca6ea1SDimitry Andric       assert(!isa<VPWidenStoreRecipe>(&R) && "stores cannot be narrowed");
1171*0fca6ea1SDimitry Andric       if (isa<VPWidenLoadRecipe>(&R))
11727a6dacacSDimitry Andric         continue;
11737a6dacacSDimitry Andric 
11745f757f3fSDimitry Andric       // Shrink operands by introducing truncates as needed.
11755f757f3fSDimitry Andric       unsigned StartIdx = isa<VPWidenSelectRecipe>(&R) ? 1 : 0;
11765f757f3fSDimitry Andric       for (unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
11775f757f3fSDimitry Andric         auto *Op = R.getOperand(Idx);
11785f757f3fSDimitry Andric         unsigned OpSizeInBits =
11795f757f3fSDimitry Andric             TypeInfo.inferScalarType(Op)->getScalarSizeInBits();
11805f757f3fSDimitry Andric         if (OpSizeInBits == NewResSizeInBits)
11815f757f3fSDimitry Andric           continue;
11825f757f3fSDimitry Andric         assert(OpSizeInBits > NewResSizeInBits && "nothing to truncate");
11835f757f3fSDimitry Andric         auto [ProcessedIter, IterIsEmpty] =
11845f757f3fSDimitry Andric             ProcessedTruncs.insert({Op, nullptr});
11855f757f3fSDimitry Andric         VPWidenCastRecipe *NewOp =
11865f757f3fSDimitry Andric             IterIsEmpty
11875f757f3fSDimitry Andric                 ? new VPWidenCastRecipe(Instruction::Trunc, Op, NewResTy)
11885f757f3fSDimitry Andric                 : ProcessedIter->second;
11895f757f3fSDimitry Andric         R.setOperand(Idx, NewOp);
11905f757f3fSDimitry Andric         if (!IterIsEmpty)
11915f757f3fSDimitry Andric           continue;
11925f757f3fSDimitry Andric         ProcessedIter->second = NewOp;
11935f757f3fSDimitry Andric         if (!Op->isLiveIn()) {
11945f757f3fSDimitry Andric           NewOp->insertBefore(&R);
11955f757f3fSDimitry Andric         } else {
11965f757f3fSDimitry Andric           PH->appendRecipe(NewOp);
11975f757f3fSDimitry Andric #ifndef NDEBUG
11985f757f3fSDimitry Andric           auto *OpInst = dyn_cast<Instruction>(Op->getLiveInIRValue());
11995f757f3fSDimitry Andric           bool IsContained = MinBWs.contains(OpInst);
12005f757f3fSDimitry Andric           NumProcessedRecipes += IsContained;
12015f757f3fSDimitry Andric #endif
12025f757f3fSDimitry Andric         }
12035f757f3fSDimitry Andric       }
12045f757f3fSDimitry Andric 
12055f757f3fSDimitry Andric     }
12065f757f3fSDimitry Andric   }
12075f757f3fSDimitry Andric 
12085f757f3fSDimitry Andric   assert(MinBWs.size() == NumProcessedRecipes &&
12095f757f3fSDimitry Andric          "some entries in MinBWs haven't been processed");
12105f757f3fSDimitry Andric }
12115f757f3fSDimitry Andric 
12125f757f3fSDimitry Andric void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) {
12135f757f3fSDimitry Andric   removeRedundantCanonicalIVs(Plan);
12145f757f3fSDimitry Andric   removeRedundantInductionCasts(Plan);
12155f757f3fSDimitry Andric 
12165f757f3fSDimitry Andric   simplifyRecipes(Plan, SE.getContext());
1217*0fca6ea1SDimitry Andric   legalizeAndOptimizeInductions(Plan, SE);
12185f757f3fSDimitry Andric   removeDeadRecipes(Plan);
12195f757f3fSDimitry Andric 
12205f757f3fSDimitry Andric   createAndOptimizeReplicateRegions(Plan);
12215f757f3fSDimitry Andric 
12225f757f3fSDimitry Andric   removeRedundantExpandSCEVRecipes(Plan);
12235f757f3fSDimitry Andric   mergeBlocksIntoPredecessors(Plan);
12245f757f3fSDimitry Andric }
12255f757f3fSDimitry Andric 
12265f757f3fSDimitry Andric // Add a VPActiveLaneMaskPHIRecipe and related recipes to \p Plan and replace
12275f757f3fSDimitry Andric // the loop terminator with a branch-on-cond recipe with the negated
12285f757f3fSDimitry Andric // active-lane-mask as operand. Note that this turns the loop into an
12295f757f3fSDimitry Andric // uncountable one. Only the existing terminator is replaced, all other existing
12305f757f3fSDimitry Andric // recipes/users remain unchanged, except for poison-generating flags being
12315f757f3fSDimitry Andric // dropped from the canonical IV increment. Return the created
12325f757f3fSDimitry Andric // VPActiveLaneMaskPHIRecipe.
12335f757f3fSDimitry Andric //
12345f757f3fSDimitry Andric // The function uses the following definitions:
12355f757f3fSDimitry Andric //
12365f757f3fSDimitry Andric //  %TripCount = DataWithControlFlowWithoutRuntimeCheck ?
12375f757f3fSDimitry Andric //    calculate-trip-count-minus-VF (original TC) : original TC
12385f757f3fSDimitry Andric //  %IncrementValue = DataWithControlFlowWithoutRuntimeCheck ?
12395f757f3fSDimitry Andric //     CanonicalIVPhi : CanonicalIVIncrement
12405f757f3fSDimitry Andric //  %StartV is the canonical induction start value.
12415f757f3fSDimitry Andric //
12425f757f3fSDimitry Andric // The function adds the following recipes:
12435f757f3fSDimitry Andric //
12445f757f3fSDimitry Andric // vector.ph:
12455f757f3fSDimitry Andric //   %TripCount = calculate-trip-count-minus-VF (original TC)
12465f757f3fSDimitry Andric //       [if DataWithControlFlowWithoutRuntimeCheck]
12475f757f3fSDimitry Andric //   %EntryInc = canonical-iv-increment-for-part %StartV
12485f757f3fSDimitry Andric //   %EntryALM = active-lane-mask %EntryInc, %TripCount
12495f757f3fSDimitry Andric //
12505f757f3fSDimitry Andric // vector.body:
12515f757f3fSDimitry Andric //   ...
12525f757f3fSDimitry Andric //   %P = active-lane-mask-phi [ %EntryALM, %vector.ph ], [ %ALM, %vector.body ]
12535f757f3fSDimitry Andric //   ...
12545f757f3fSDimitry Andric //   %InLoopInc = canonical-iv-increment-for-part %IncrementValue
12555f757f3fSDimitry Andric //   %ALM = active-lane-mask %InLoopInc, TripCount
12565f757f3fSDimitry Andric //   %Negated = Not %ALM
12575f757f3fSDimitry Andric //   branch-on-cond %Negated
12585f757f3fSDimitry Andric //
12595f757f3fSDimitry Andric static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
12605f757f3fSDimitry Andric     VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) {
12615f757f3fSDimitry Andric   VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
12625f757f3fSDimitry Andric   VPBasicBlock *EB = TopRegion->getExitingBasicBlock();
12635f757f3fSDimitry Andric   auto *CanonicalIVPHI = Plan.getCanonicalIV();
12645f757f3fSDimitry Andric   VPValue *StartV = CanonicalIVPHI->getStartValue();
12655f757f3fSDimitry Andric 
12665f757f3fSDimitry Andric   auto *CanonicalIVIncrement =
12675f757f3fSDimitry Andric       cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue());
12685f757f3fSDimitry Andric   // TODO: Check if dropping the flags is needed if
12695f757f3fSDimitry Andric   // !DataAndControlFlowWithoutRuntimeCheck.
12705f757f3fSDimitry Andric   CanonicalIVIncrement->dropPoisonGeneratingFlags();
12715f757f3fSDimitry Andric   DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
12725f757f3fSDimitry Andric   // We can't use StartV directly in the ActiveLaneMask VPInstruction, since
12735f757f3fSDimitry Andric   // we have to take unrolling into account. Each part needs to start at
12745f757f3fSDimitry Andric   //   Part * VF
12755f757f3fSDimitry Andric   auto *VecPreheader = cast<VPBasicBlock>(TopRegion->getSinglePredecessor());
12765f757f3fSDimitry Andric   VPBuilder Builder(VecPreheader);
12775f757f3fSDimitry Andric 
12785f757f3fSDimitry Andric   // Create the ActiveLaneMask instruction using the correct start values.
12795f757f3fSDimitry Andric   VPValue *TC = Plan.getTripCount();
12805f757f3fSDimitry Andric 
12815f757f3fSDimitry Andric   VPValue *TripCount, *IncrementValue;
12825f757f3fSDimitry Andric   if (!DataAndControlFlowWithoutRuntimeCheck) {
12835f757f3fSDimitry Andric     // When the loop is guarded by a runtime overflow check for the loop
12845f757f3fSDimitry Andric     // induction variable increment by VF, we can increment the value before
12855f757f3fSDimitry Andric     // the get.active.lane mask and use the unmodified tripcount.
12865f757f3fSDimitry Andric     IncrementValue = CanonicalIVIncrement;
12875f757f3fSDimitry Andric     TripCount = TC;
12885f757f3fSDimitry Andric   } else {
12895f757f3fSDimitry Andric     // When avoiding a runtime check, the active.lane.mask inside the loop
12905f757f3fSDimitry Andric     // uses a modified trip count and the induction variable increment is
12915f757f3fSDimitry Andric     // done after the active.lane.mask intrinsic is called.
12925f757f3fSDimitry Andric     IncrementValue = CanonicalIVPHI;
12935f757f3fSDimitry Andric     TripCount = Builder.createNaryOp(VPInstruction::CalculateTripCountMinusVF,
12945f757f3fSDimitry Andric                                      {TC}, DL);
12955f757f3fSDimitry Andric   }
12965f757f3fSDimitry Andric   auto *EntryIncrement = Builder.createOverflowingOp(
12975f757f3fSDimitry Andric       VPInstruction::CanonicalIVIncrementForPart, {StartV}, {false, false}, DL,
12985f757f3fSDimitry Andric       "index.part.next");
12995f757f3fSDimitry Andric 
13005f757f3fSDimitry Andric   // Create the active lane mask instruction in the VPlan preheader.
13015f757f3fSDimitry Andric   auto *EntryALM =
13025f757f3fSDimitry Andric       Builder.createNaryOp(VPInstruction::ActiveLaneMask, {EntryIncrement, TC},
13035f757f3fSDimitry Andric                            DL, "active.lane.mask.entry");
13045f757f3fSDimitry Andric 
13055f757f3fSDimitry Andric   // Now create the ActiveLaneMaskPhi recipe in the main loop using the
13065f757f3fSDimitry Andric   // preheader ActiveLaneMask instruction.
13075f757f3fSDimitry Andric   auto LaneMaskPhi = new VPActiveLaneMaskPHIRecipe(EntryALM, DebugLoc());
13085f757f3fSDimitry Andric   LaneMaskPhi->insertAfter(CanonicalIVPHI);
13095f757f3fSDimitry Andric 
13105f757f3fSDimitry Andric   // Create the active lane mask for the next iteration of the loop before the
13115f757f3fSDimitry Andric   // original terminator.
13125f757f3fSDimitry Andric   VPRecipeBase *OriginalTerminator = EB->getTerminator();
13135f757f3fSDimitry Andric   Builder.setInsertPoint(OriginalTerminator);
13145f757f3fSDimitry Andric   auto *InLoopIncrement =
13155f757f3fSDimitry Andric       Builder.createOverflowingOp(VPInstruction::CanonicalIVIncrementForPart,
13165f757f3fSDimitry Andric                                   {IncrementValue}, {false, false}, DL);
13175f757f3fSDimitry Andric   auto *ALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,
13185f757f3fSDimitry Andric                                    {InLoopIncrement, TripCount}, DL,
13195f757f3fSDimitry Andric                                    "active.lane.mask.next");
13205f757f3fSDimitry Andric   LaneMaskPhi->addOperand(ALM);
13215f757f3fSDimitry Andric 
13225f757f3fSDimitry Andric   // Replace the original terminator with BranchOnCond. We have to invert the
13235f757f3fSDimitry Andric   // mask here because a true condition means jumping to the exit block.
13245f757f3fSDimitry Andric   auto *NotMask = Builder.createNot(ALM, DL);
13255f757f3fSDimitry Andric   Builder.createNaryOp(VPInstruction::BranchOnCond, {NotMask}, DL);
13265f757f3fSDimitry Andric   OriginalTerminator->eraseFromParent();
13275f757f3fSDimitry Andric   return LaneMaskPhi;
13285f757f3fSDimitry Andric }
13295f757f3fSDimitry Andric 
1330*0fca6ea1SDimitry Andric /// Collect all VPValues representing a header mask through the (ICMP_ULE,
1331*0fca6ea1SDimitry Andric /// WideCanonicalIV, backedge-taken-count) pattern.
1332*0fca6ea1SDimitry Andric /// TODO: Introduce explicit recipe for header-mask instead of searching
1333*0fca6ea1SDimitry Andric /// for the header-mask pattern manually.
1334*0fca6ea1SDimitry Andric static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
1335*0fca6ea1SDimitry Andric   SmallVector<VPValue *> WideCanonicalIVs;
1336*0fca6ea1SDimitry Andric   auto *FoundWidenCanonicalIVUser =
1337*0fca6ea1SDimitry Andric       find_if(Plan.getCanonicalIV()->users(),
1338*0fca6ea1SDimitry Andric               [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
1339*0fca6ea1SDimitry Andric   assert(count_if(Plan.getCanonicalIV()->users(),
1340*0fca6ea1SDimitry Andric                   [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); }) <=
1341*0fca6ea1SDimitry Andric              1 &&
1342*0fca6ea1SDimitry Andric          "Must have at most one VPWideCanonicalIVRecipe");
1343*0fca6ea1SDimitry Andric   if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) {
1344*0fca6ea1SDimitry Andric     auto *WideCanonicalIV =
1345*0fca6ea1SDimitry Andric         cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
1346*0fca6ea1SDimitry Andric     WideCanonicalIVs.push_back(WideCanonicalIV);
1347*0fca6ea1SDimitry Andric   }
1348*0fca6ea1SDimitry Andric 
1349*0fca6ea1SDimitry Andric   // Also include VPWidenIntOrFpInductionRecipes that represent a widened
1350*0fca6ea1SDimitry Andric   // version of the canonical induction.
1351*0fca6ea1SDimitry Andric   VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
1352*0fca6ea1SDimitry Andric   for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
1353*0fca6ea1SDimitry Andric     auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
1354*0fca6ea1SDimitry Andric     if (WidenOriginalIV && WidenOriginalIV->isCanonical())
1355*0fca6ea1SDimitry Andric       WideCanonicalIVs.push_back(WidenOriginalIV);
1356*0fca6ea1SDimitry Andric   }
1357*0fca6ea1SDimitry Andric 
1358*0fca6ea1SDimitry Andric   // Walk users of wide canonical IVs and collect to all compares of the form
1359*0fca6ea1SDimitry Andric   // (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
1360*0fca6ea1SDimitry Andric   SmallVector<VPValue *> HeaderMasks;
1361*0fca6ea1SDimitry Andric   for (auto *Wide : WideCanonicalIVs) {
1362*0fca6ea1SDimitry Andric     for (VPUser *U : SmallVector<VPUser *>(Wide->users())) {
1363*0fca6ea1SDimitry Andric       auto *HeaderMask = dyn_cast<VPInstruction>(U);
1364*0fca6ea1SDimitry Andric       if (!HeaderMask || !vputils::isHeaderMask(HeaderMask, Plan))
1365*0fca6ea1SDimitry Andric         continue;
1366*0fca6ea1SDimitry Andric 
1367*0fca6ea1SDimitry Andric       assert(HeaderMask->getOperand(0) == Wide &&
1368*0fca6ea1SDimitry Andric              "WidenCanonicalIV must be the first operand of the compare");
1369*0fca6ea1SDimitry Andric       HeaderMasks.push_back(HeaderMask);
1370*0fca6ea1SDimitry Andric     }
1371*0fca6ea1SDimitry Andric   }
1372*0fca6ea1SDimitry Andric   return HeaderMasks;
1373*0fca6ea1SDimitry Andric }
1374*0fca6ea1SDimitry Andric 
13755f757f3fSDimitry Andric void VPlanTransforms::addActiveLaneMask(
13765f757f3fSDimitry Andric     VPlan &Plan, bool UseActiveLaneMaskForControlFlow,
13775f757f3fSDimitry Andric     bool DataAndControlFlowWithoutRuntimeCheck) {
13785f757f3fSDimitry Andric   assert((!DataAndControlFlowWithoutRuntimeCheck ||
13795f757f3fSDimitry Andric           UseActiveLaneMaskForControlFlow) &&
13805f757f3fSDimitry Andric          "DataAndControlFlowWithoutRuntimeCheck implies "
13815f757f3fSDimitry Andric          "UseActiveLaneMaskForControlFlow");
13825f757f3fSDimitry Andric 
13835f757f3fSDimitry Andric   auto FoundWidenCanonicalIVUser =
13845f757f3fSDimitry Andric       find_if(Plan.getCanonicalIV()->users(),
13855f757f3fSDimitry Andric               [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
13865f757f3fSDimitry Andric   assert(FoundWidenCanonicalIVUser &&
13875f757f3fSDimitry Andric          "Must have widened canonical IV when tail folding!");
13885f757f3fSDimitry Andric   auto *WideCanonicalIV =
13895f757f3fSDimitry Andric       cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
13907a6dacacSDimitry Andric   VPSingleDefRecipe *LaneMask;
13915f757f3fSDimitry Andric   if (UseActiveLaneMaskForControlFlow) {
13925f757f3fSDimitry Andric     LaneMask = addVPLaneMaskPhiAndUpdateExitBranch(
13935f757f3fSDimitry Andric         Plan, DataAndControlFlowWithoutRuntimeCheck);
13945f757f3fSDimitry Andric   } else {
1395*0fca6ea1SDimitry Andric     VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV);
1396*0fca6ea1SDimitry Andric     LaneMask = B.createNaryOp(VPInstruction::ActiveLaneMask,
1397*0fca6ea1SDimitry Andric                               {WideCanonicalIV, Plan.getTripCount()}, nullptr,
1398*0fca6ea1SDimitry Andric                               "active.lane.mask");
13995f757f3fSDimitry Andric   }
14005f757f3fSDimitry Andric 
14015f757f3fSDimitry Andric   // Walk users of WideCanonicalIV and replace all compares of the form
14025f757f3fSDimitry Andric   // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an
14035f757f3fSDimitry Andric   // active-lane-mask.
1404*0fca6ea1SDimitry Andric   for (VPValue *HeaderMask : collectAllHeaderMasks(Plan))
1405*0fca6ea1SDimitry Andric     HeaderMask->replaceAllUsesWith(LaneMask);
1406*0fca6ea1SDimitry Andric }
1407*0fca6ea1SDimitry Andric 
1408*0fca6ea1SDimitry Andric /// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
1409*0fca6ea1SDimitry Andric /// replaces all uses except the canonical IV increment of
1410*0fca6ea1SDimitry Andric /// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe
1411*0fca6ea1SDimitry Andric /// is used only for loop iterations counting after this transformation.
1412*0fca6ea1SDimitry Andric ///
1413*0fca6ea1SDimitry Andric /// The function uses the following definitions:
1414*0fca6ea1SDimitry Andric ///  %StartV is the canonical induction start value.
1415*0fca6ea1SDimitry Andric ///
1416*0fca6ea1SDimitry Andric /// The function adds the following recipes:
1417*0fca6ea1SDimitry Andric ///
1418*0fca6ea1SDimitry Andric /// vector.ph:
1419*0fca6ea1SDimitry Andric /// ...
1420*0fca6ea1SDimitry Andric ///
1421*0fca6ea1SDimitry Andric /// vector.body:
1422*0fca6ea1SDimitry Andric /// ...
1423*0fca6ea1SDimitry Andric /// %EVLPhi = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ],
1424*0fca6ea1SDimitry Andric ///                                               [ %NextEVLIV, %vector.body ]
1425*0fca6ea1SDimitry Andric /// %VPEVL = EXPLICIT-VECTOR-LENGTH %EVLPhi, original TC
1426*0fca6ea1SDimitry Andric /// ...
1427*0fca6ea1SDimitry Andric /// %NextEVLIV = add IVSize (cast i32 %VPEVVL to IVSize), %EVLPhi
1428*0fca6ea1SDimitry Andric /// ...
1429*0fca6ea1SDimitry Andric ///
1430*0fca6ea1SDimitry Andric bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
1431*0fca6ea1SDimitry Andric   VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
1432*0fca6ea1SDimitry Andric   // The transform updates all users of inductions to work based on EVL, instead
1433*0fca6ea1SDimitry Andric   // of the VF directly. At the moment, widened inductions cannot be updated, so
1434*0fca6ea1SDimitry Andric   // bail out if the plan contains any.
1435*0fca6ea1SDimitry Andric   bool ContainsWidenInductions = any_of(Header->phis(), [](VPRecipeBase &Phi) {
1436*0fca6ea1SDimitry Andric     return isa<VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe>(
1437*0fca6ea1SDimitry Andric         &Phi);
1438*0fca6ea1SDimitry Andric   });
1439*0fca6ea1SDimitry Andric   // FIXME: Remove this once we can transform (select header_mask, true_value,
1440*0fca6ea1SDimitry Andric   // false_value) into vp.merge.
1441*0fca6ea1SDimitry Andric   bool ContainsOutloopReductions =
1442*0fca6ea1SDimitry Andric       any_of(Header->phis(), [&](VPRecipeBase &Phi) {
1443*0fca6ea1SDimitry Andric         auto *R = dyn_cast<VPReductionPHIRecipe>(&Phi);
1444*0fca6ea1SDimitry Andric         return R && !R->isInLoop();
1445*0fca6ea1SDimitry Andric       });
1446*0fca6ea1SDimitry Andric   if (ContainsWidenInductions || ContainsOutloopReductions)
1447*0fca6ea1SDimitry Andric     return false;
1448*0fca6ea1SDimitry Andric 
1449*0fca6ea1SDimitry Andric   auto *CanonicalIVPHI = Plan.getCanonicalIV();
1450*0fca6ea1SDimitry Andric   VPValue *StartV = CanonicalIVPHI->getStartValue();
1451*0fca6ea1SDimitry Andric 
1452*0fca6ea1SDimitry Andric   // Create the ExplicitVectorLengthPhi recipe in the main loop.
1453*0fca6ea1SDimitry Andric   auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc());
1454*0fca6ea1SDimitry Andric   EVLPhi->insertAfter(CanonicalIVPHI);
1455*0fca6ea1SDimitry Andric   auto *VPEVL = new VPInstruction(VPInstruction::ExplicitVectorLength,
1456*0fca6ea1SDimitry Andric                                   {EVLPhi, Plan.getTripCount()});
1457*0fca6ea1SDimitry Andric   VPEVL->insertBefore(*Header, Header->getFirstNonPhi());
1458*0fca6ea1SDimitry Andric 
1459*0fca6ea1SDimitry Andric   auto *CanonicalIVIncrement =
1460*0fca6ea1SDimitry Andric       cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue());
1461*0fca6ea1SDimitry Andric   VPSingleDefRecipe *OpVPEVL = VPEVL;
1462*0fca6ea1SDimitry Andric   if (unsigned IVSize = CanonicalIVPHI->getScalarType()->getScalarSizeInBits();
1463*0fca6ea1SDimitry Andric       IVSize != 32) {
1464*0fca6ea1SDimitry Andric     OpVPEVL = new VPScalarCastRecipe(IVSize < 32 ? Instruction::Trunc
1465*0fca6ea1SDimitry Andric                                                  : Instruction::ZExt,
1466*0fca6ea1SDimitry Andric                                      OpVPEVL, CanonicalIVPHI->getScalarType());
1467*0fca6ea1SDimitry Andric     OpVPEVL->insertBefore(CanonicalIVIncrement);
1468*0fca6ea1SDimitry Andric   }
1469*0fca6ea1SDimitry Andric   auto *NextEVLIV =
1470*0fca6ea1SDimitry Andric       new VPInstruction(Instruction::Add, {OpVPEVL, EVLPhi},
1471*0fca6ea1SDimitry Andric                         {CanonicalIVIncrement->hasNoUnsignedWrap(),
1472*0fca6ea1SDimitry Andric                          CanonicalIVIncrement->hasNoSignedWrap()},
1473*0fca6ea1SDimitry Andric                         CanonicalIVIncrement->getDebugLoc(), "index.evl.next");
1474*0fca6ea1SDimitry Andric   NextEVLIV->insertBefore(CanonicalIVIncrement);
1475*0fca6ea1SDimitry Andric   EVLPhi->addOperand(NextEVLIV);
1476*0fca6ea1SDimitry Andric 
1477*0fca6ea1SDimitry Andric   for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
1478*0fca6ea1SDimitry Andric     for (VPUser *U : collectUsersRecursively(HeaderMask)) {
1479*0fca6ea1SDimitry Andric       VPRecipeBase *NewRecipe = nullptr;
1480*0fca6ea1SDimitry Andric       auto *CurRecipe = dyn_cast<VPRecipeBase>(U);
1481*0fca6ea1SDimitry Andric       if (!CurRecipe)
14825f757f3fSDimitry Andric         continue;
14835f757f3fSDimitry Andric 
1484*0fca6ea1SDimitry Andric       auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
1485*0fca6ea1SDimitry Andric         assert(OrigMask && "Unmasked recipe when folding tail");
1486*0fca6ea1SDimitry Andric         return HeaderMask == OrigMask ? nullptr : OrigMask;
1487*0fca6ea1SDimitry Andric       };
1488*0fca6ea1SDimitry Andric       if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(CurRecipe)) {
1489*0fca6ea1SDimitry Andric         VPValue *NewMask = GetNewMask(MemR->getMask());
1490*0fca6ea1SDimitry Andric         if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR))
1491*0fca6ea1SDimitry Andric           NewRecipe = new VPWidenLoadEVLRecipe(L, VPEVL, NewMask);
1492*0fca6ea1SDimitry Andric         else if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR))
1493*0fca6ea1SDimitry Andric           NewRecipe = new VPWidenStoreEVLRecipe(S, VPEVL, NewMask);
1494*0fca6ea1SDimitry Andric         else
1495*0fca6ea1SDimitry Andric           llvm_unreachable("unsupported recipe");
1496*0fca6ea1SDimitry Andric       } else if (auto *RedR = dyn_cast<VPReductionRecipe>(CurRecipe)) {
1497*0fca6ea1SDimitry Andric         NewRecipe = new VPReductionEVLRecipe(RedR, VPEVL,
1498*0fca6ea1SDimitry Andric                                              GetNewMask(RedR->getCondOp()));
1499*0fca6ea1SDimitry Andric       }
1500*0fca6ea1SDimitry Andric 
1501*0fca6ea1SDimitry Andric       if (NewRecipe) {
1502*0fca6ea1SDimitry Andric         [[maybe_unused]] unsigned NumDefVal = NewRecipe->getNumDefinedValues();
1503*0fca6ea1SDimitry Andric         assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
1504*0fca6ea1SDimitry Andric                "New recipe must define the same number of values as the "
1505*0fca6ea1SDimitry Andric                "original.");
1506*0fca6ea1SDimitry Andric         assert(
1507*0fca6ea1SDimitry Andric             NumDefVal <= 1 &&
1508*0fca6ea1SDimitry Andric             "Only supports recipes with a single definition or without users.");
1509*0fca6ea1SDimitry Andric         NewRecipe->insertBefore(CurRecipe);
1510*0fca6ea1SDimitry Andric         if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe>(NewRecipe)) {
1511*0fca6ea1SDimitry Andric           VPValue *CurVPV = CurRecipe->getVPSingleValue();
1512*0fca6ea1SDimitry Andric           CurVPV->replaceAllUsesWith(NewRecipe->getVPSingleValue());
1513*0fca6ea1SDimitry Andric         }
1514*0fca6ea1SDimitry Andric         CurRecipe->eraseFromParent();
1515*0fca6ea1SDimitry Andric       }
1516*0fca6ea1SDimitry Andric     }
1517*0fca6ea1SDimitry Andric     recursivelyDeleteDeadRecipes(HeaderMask);
1518*0fca6ea1SDimitry Andric   }
1519*0fca6ea1SDimitry Andric   // Replace all uses of VPCanonicalIVPHIRecipe by
1520*0fca6ea1SDimitry Andric   // VPEVLBasedIVPHIRecipe except for the canonical IV increment.
1521*0fca6ea1SDimitry Andric   CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
1522*0fca6ea1SDimitry Andric   CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
1523*0fca6ea1SDimitry Andric   // TODO: support unroll factor > 1.
1524*0fca6ea1SDimitry Andric   Plan.setUF(1);
1525*0fca6ea1SDimitry Andric   return true;
1526*0fca6ea1SDimitry Andric }
1527*0fca6ea1SDimitry Andric 
1528*0fca6ea1SDimitry Andric void VPlanTransforms::dropPoisonGeneratingRecipes(
1529*0fca6ea1SDimitry Andric     VPlan &Plan, function_ref<bool(BasicBlock *)> BlockNeedsPredication) {
1530*0fca6ea1SDimitry Andric   // Collect recipes in the backward slice of `Root` that may generate a poison
1531*0fca6ea1SDimitry Andric   // value that is used after vectorization.
1532*0fca6ea1SDimitry Andric   SmallPtrSet<VPRecipeBase *, 16> Visited;
1533*0fca6ea1SDimitry Andric   auto collectPoisonGeneratingInstrsInBackwardSlice([&](VPRecipeBase *Root) {
1534*0fca6ea1SDimitry Andric     SmallVector<VPRecipeBase *, 16> Worklist;
1535*0fca6ea1SDimitry Andric     Worklist.push_back(Root);
1536*0fca6ea1SDimitry Andric 
1537*0fca6ea1SDimitry Andric     // Traverse the backward slice of Root through its use-def chain.
1538*0fca6ea1SDimitry Andric     while (!Worklist.empty()) {
1539*0fca6ea1SDimitry Andric       VPRecipeBase *CurRec = Worklist.back();
1540*0fca6ea1SDimitry Andric       Worklist.pop_back();
1541*0fca6ea1SDimitry Andric 
1542*0fca6ea1SDimitry Andric       if (!Visited.insert(CurRec).second)
1543*0fca6ea1SDimitry Andric         continue;
1544*0fca6ea1SDimitry Andric 
1545*0fca6ea1SDimitry Andric       // Prune search if we find another recipe generating a widen memory
1546*0fca6ea1SDimitry Andric       // instruction. Widen memory instructions involved in address computation
1547*0fca6ea1SDimitry Andric       // will lead to gather/scatter instructions, which don't need to be
1548*0fca6ea1SDimitry Andric       // handled.
1549*0fca6ea1SDimitry Andric       if (isa<VPWidenMemoryRecipe>(CurRec) || isa<VPInterleaveRecipe>(CurRec) ||
1550*0fca6ea1SDimitry Andric           isa<VPScalarIVStepsRecipe>(CurRec) || isa<VPHeaderPHIRecipe>(CurRec))
1551*0fca6ea1SDimitry Andric         continue;
1552*0fca6ea1SDimitry Andric 
1553*0fca6ea1SDimitry Andric       // This recipe contributes to the address computation of a widen
1554*0fca6ea1SDimitry Andric       // load/store. If the underlying instruction has poison-generating flags,
1555*0fca6ea1SDimitry Andric       // drop them directly.
1556*0fca6ea1SDimitry Andric       if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(CurRec)) {
1557*0fca6ea1SDimitry Andric         VPValue *A, *B;
1558*0fca6ea1SDimitry Andric         using namespace llvm::VPlanPatternMatch;
1559*0fca6ea1SDimitry Andric         // Dropping disjoint from an OR may yield incorrect results, as some
1560*0fca6ea1SDimitry Andric         // analysis may have converted it to an Add implicitly (e.g. SCEV used
1561*0fca6ea1SDimitry Andric         // for dependence analysis). Instead, replace it with an equivalent Add.
1562*0fca6ea1SDimitry Andric         // This is possible as all users of the disjoint OR only access lanes
1563*0fca6ea1SDimitry Andric         // where the operands are disjoint or poison otherwise.
1564*0fca6ea1SDimitry Andric         if (match(RecWithFlags, m_BinaryOr(m_VPValue(A), m_VPValue(B))) &&
1565*0fca6ea1SDimitry Andric             RecWithFlags->isDisjoint()) {
1566*0fca6ea1SDimitry Andric           VPBuilder Builder(RecWithFlags);
1567*0fca6ea1SDimitry Andric           VPInstruction *New = Builder.createOverflowingOp(
1568*0fca6ea1SDimitry Andric               Instruction::Add, {A, B}, {false, false},
1569*0fca6ea1SDimitry Andric               RecWithFlags->getDebugLoc());
1570*0fca6ea1SDimitry Andric           New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
1571*0fca6ea1SDimitry Andric           RecWithFlags->replaceAllUsesWith(New);
1572*0fca6ea1SDimitry Andric           RecWithFlags->eraseFromParent();
1573*0fca6ea1SDimitry Andric           CurRec = New;
1574*0fca6ea1SDimitry Andric         } else
1575*0fca6ea1SDimitry Andric           RecWithFlags->dropPoisonGeneratingFlags();
1576*0fca6ea1SDimitry Andric       } else {
1577*0fca6ea1SDimitry Andric         Instruction *Instr = dyn_cast_or_null<Instruction>(
1578*0fca6ea1SDimitry Andric             CurRec->getVPSingleValue()->getUnderlyingValue());
1579*0fca6ea1SDimitry Andric         (void)Instr;
1580*0fca6ea1SDimitry Andric         assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
1581*0fca6ea1SDimitry Andric                "found instruction with poison generating flags not covered by "
1582*0fca6ea1SDimitry Andric                "VPRecipeWithIRFlags");
1583*0fca6ea1SDimitry Andric       }
1584*0fca6ea1SDimitry Andric 
1585*0fca6ea1SDimitry Andric       // Add new definitions to the worklist.
1586*0fca6ea1SDimitry Andric       for (VPValue *operand : CurRec->operands())
1587*0fca6ea1SDimitry Andric         if (VPRecipeBase *OpDef = operand->getDefiningRecipe())
1588*0fca6ea1SDimitry Andric           Worklist.push_back(OpDef);
1589*0fca6ea1SDimitry Andric     }
1590*0fca6ea1SDimitry Andric   });
1591*0fca6ea1SDimitry Andric 
1592*0fca6ea1SDimitry Andric   // Traverse all the recipes in the VPlan and collect the poison-generating
1593*0fca6ea1SDimitry Andric   // recipes in the backward slice starting at the address of a VPWidenRecipe or
1594*0fca6ea1SDimitry Andric   // VPInterleaveRecipe.
1595*0fca6ea1SDimitry Andric   auto Iter = vp_depth_first_deep(Plan.getEntry());
1596*0fca6ea1SDimitry Andric   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
1597*0fca6ea1SDimitry Andric     for (VPRecipeBase &Recipe : *VPBB) {
1598*0fca6ea1SDimitry Andric       if (auto *WidenRec = dyn_cast<VPWidenMemoryRecipe>(&Recipe)) {
1599*0fca6ea1SDimitry Andric         Instruction &UnderlyingInstr = WidenRec->getIngredient();
1600*0fca6ea1SDimitry Andric         VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
1601*0fca6ea1SDimitry Andric         if (AddrDef && WidenRec->isConsecutive() &&
1602*0fca6ea1SDimitry Andric             BlockNeedsPredication(UnderlyingInstr.getParent()))
1603*0fca6ea1SDimitry Andric           collectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
1604*0fca6ea1SDimitry Andric       } else if (auto *InterleaveRec = dyn_cast<VPInterleaveRecipe>(&Recipe)) {
1605*0fca6ea1SDimitry Andric         VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
1606*0fca6ea1SDimitry Andric         if (AddrDef) {
1607*0fca6ea1SDimitry Andric           // Check if any member of the interleave group needs predication.
1608*0fca6ea1SDimitry Andric           const InterleaveGroup<Instruction> *InterGroup =
1609*0fca6ea1SDimitry Andric               InterleaveRec->getInterleaveGroup();
1610*0fca6ea1SDimitry Andric           bool NeedPredication = false;
1611*0fca6ea1SDimitry Andric           for (int I = 0, NumMembers = InterGroup->getNumMembers();
1612*0fca6ea1SDimitry Andric                I < NumMembers; ++I) {
1613*0fca6ea1SDimitry Andric             Instruction *Member = InterGroup->getMember(I);
1614*0fca6ea1SDimitry Andric             if (Member)
1615*0fca6ea1SDimitry Andric               NeedPredication |= BlockNeedsPredication(Member->getParent());
1616*0fca6ea1SDimitry Andric           }
1617*0fca6ea1SDimitry Andric 
1618*0fca6ea1SDimitry Andric           if (NeedPredication)
1619*0fca6ea1SDimitry Andric             collectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
1620*0fca6ea1SDimitry Andric         }
1621*0fca6ea1SDimitry Andric       }
1622*0fca6ea1SDimitry Andric     }
16235f757f3fSDimitry Andric   }
16245f757f3fSDimitry Andric }
1625