xref: /llvm-project/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp (revision 6c8f41d3367476d35ac730abf9f980291737193b)
18ec40675SFlorian Hahn //===-- VPlanUnroll.cpp - VPlan unroller ----------------------------------===//
28ec40675SFlorian Hahn //
38ec40675SFlorian Hahn // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
48ec40675SFlorian Hahn // See https://llvm.org/LICENSE.txt for license information.
58ec40675SFlorian Hahn // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
68ec40675SFlorian Hahn //
78ec40675SFlorian Hahn //===----------------------------------------------------------------------===//
88ec40675SFlorian Hahn ///
98ec40675SFlorian Hahn /// \file
108ec40675SFlorian Hahn /// This file implements explicit unrolling for VPlans.
118ec40675SFlorian Hahn ///
128ec40675SFlorian Hahn //===----------------------------------------------------------------------===//
138ec40675SFlorian Hahn 
148ec40675SFlorian Hahn #include "VPRecipeBuilder.h"
158ec40675SFlorian Hahn #include "VPlan.h"
168ec40675SFlorian Hahn #include "VPlanAnalysis.h"
178ec40675SFlorian Hahn #include "VPlanCFG.h"
188ec40675SFlorian Hahn #include "VPlanPatternMatch.h"
198ec40675SFlorian Hahn #include "VPlanTransforms.h"
208ec40675SFlorian Hahn #include "VPlanUtils.h"
218ec40675SFlorian Hahn #include "llvm/ADT/PostOrderIterator.h"
228ec40675SFlorian Hahn #include "llvm/ADT/STLExtras.h"
238ec40675SFlorian Hahn #include "llvm/ADT/ScopeExit.h"
248ec40675SFlorian Hahn #include "llvm/Analysis/IVDescriptors.h"
258ec40675SFlorian Hahn #include "llvm/IR/Intrinsics.h"
268ec40675SFlorian Hahn 
278ec40675SFlorian Hahn using namespace llvm;
288ec40675SFlorian Hahn 
298ec40675SFlorian Hahn namespace {
308ec40675SFlorian Hahn 
318ec40675SFlorian Hahn /// Helper to hold state needed for unrolling. It holds the Plan to unroll by
328ec40675SFlorian Hahn /// UF. It also holds copies of VPValues across UF-1 unroll parts to facilitate
338ec40675SFlorian Hahn /// the unrolling transformation, where the original VPValues are retained for
348ec40675SFlorian Hahn /// part zero.
358ec40675SFlorian Hahn class UnrollState {
368ec40675SFlorian Hahn   /// Plan to unroll.
378ec40675SFlorian Hahn   VPlan &Plan;
388ec40675SFlorian Hahn   /// Unroll factor to unroll by.
398ec40675SFlorian Hahn   const unsigned UF;
408ec40675SFlorian Hahn   /// Analysis for types.
418ec40675SFlorian Hahn   VPTypeAnalysis TypeInfo;
428ec40675SFlorian Hahn 
438ec40675SFlorian Hahn   /// Unrolling may create recipes that should not be unrolled themselves.
448ec40675SFlorian Hahn   /// Those are tracked in ToSkip.
458ec40675SFlorian Hahn   SmallPtrSet<VPRecipeBase *, 8> ToSkip;
468ec40675SFlorian Hahn 
478ec40675SFlorian Hahn   // Associate with each VPValue of part 0 its unrolled instances of parts 1,
488ec40675SFlorian Hahn   // ..., UF-1.
498ec40675SFlorian Hahn   DenseMap<VPValue *, SmallVector<VPValue *>> VPV2Parts;
508ec40675SFlorian Hahn 
518ec40675SFlorian Hahn   /// Unroll replicate region \p VPR by cloning the region UF - 1 times.
528ec40675SFlorian Hahn   void unrollReplicateRegionByUF(VPRegionBlock *VPR);
538ec40675SFlorian Hahn 
548ec40675SFlorian Hahn   /// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across
558ec40675SFlorian Hahn   /// all parts.
568ec40675SFlorian Hahn   void unrollRecipeByUF(VPRecipeBase &R);
578ec40675SFlorian Hahn 
588ec40675SFlorian Hahn   /// Unroll header phi recipe \p R. How exactly the recipe gets unrolled
598ec40675SFlorian Hahn   /// depends on the concrete header phi. Inserts newly created recipes at \p
608ec40675SFlorian Hahn   /// InsertPtForPhi.
618ec40675SFlorian Hahn   void unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
628ec40675SFlorian Hahn                            VPBasicBlock::iterator InsertPtForPhi);
638ec40675SFlorian Hahn 
648ec40675SFlorian Hahn   /// Unroll a widen induction recipe \p IV. This introduces recipes to compute
658ec40675SFlorian Hahn   /// the induction steps for each part.
668ec40675SFlorian Hahn   void unrollWidenInductionByUF(VPWidenIntOrFpInductionRecipe *IV,
678ec40675SFlorian Hahn                                 VPBasicBlock::iterator InsertPtForPhi);
688ec40675SFlorian Hahn 
698ec40675SFlorian Hahn   VPValue *getConstantVPV(unsigned Part) {
708ec40675SFlorian Hahn     Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType();
718ec40675SFlorian Hahn     return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part));
728ec40675SFlorian Hahn   }
738ec40675SFlorian Hahn 
748ec40675SFlorian Hahn public:
758ec40675SFlorian Hahn   UnrollState(VPlan &Plan, unsigned UF, LLVMContext &Ctx)
768ec40675SFlorian Hahn       : Plan(Plan), UF(UF), TypeInfo(Plan.getCanonicalIV()->getScalarType()) {}
778ec40675SFlorian Hahn 
788ec40675SFlorian Hahn   void unrollBlock(VPBlockBase *VPB);
798ec40675SFlorian Hahn 
808ec40675SFlorian Hahn   VPValue *getValueForPart(VPValue *V, unsigned Part) {
818ec40675SFlorian Hahn     if (Part == 0 || V->isLiveIn())
828ec40675SFlorian Hahn       return V;
838ec40675SFlorian Hahn     assert((VPV2Parts.contains(V) && VPV2Parts[V].size() >= Part) &&
848ec40675SFlorian Hahn            "accessed value does not exist");
858ec40675SFlorian Hahn     return VPV2Parts[V][Part - 1];
868ec40675SFlorian Hahn   }
878ec40675SFlorian Hahn 
888ec40675SFlorian Hahn   /// Given a single original recipe \p OrigR (of part zero), and its copy \p
898ec40675SFlorian Hahn   /// CopyR for part \p Part, map every VPValue defined by \p OrigR to its
908ec40675SFlorian Hahn   /// corresponding VPValue defined by \p CopyR.
918ec40675SFlorian Hahn   void addRecipeForPart(VPRecipeBase *OrigR, VPRecipeBase *CopyR,
928ec40675SFlorian Hahn                         unsigned Part) {
938ec40675SFlorian Hahn     for (const auto &[Idx, VPV] : enumerate(OrigR->definedValues())) {
948ec40675SFlorian Hahn       auto Ins = VPV2Parts.insert({VPV, {}});
958ec40675SFlorian Hahn       assert(Ins.first->second.size() == Part - 1 && "earlier parts not set");
968ec40675SFlorian Hahn       Ins.first->second.push_back(CopyR->getVPValue(Idx));
978ec40675SFlorian Hahn     }
988ec40675SFlorian Hahn   }
998ec40675SFlorian Hahn 
1008ec40675SFlorian Hahn   /// Given a uniform recipe \p R, add it for all parts.
1018ec40675SFlorian Hahn   void addUniformForAllParts(VPSingleDefRecipe *R) {
1028ec40675SFlorian Hahn     auto Ins = VPV2Parts.insert({R, {}});
1038ec40675SFlorian Hahn     assert(Ins.second && "uniform value already added");
1048ec40675SFlorian Hahn     for (unsigned Part = 0; Part != UF; ++Part)
1058ec40675SFlorian Hahn       Ins.first->second.push_back(R);
1068ec40675SFlorian Hahn   }
1078ec40675SFlorian Hahn 
1088ec40675SFlorian Hahn   bool contains(VPValue *VPV) const { return VPV2Parts.contains(VPV); }
1098ec40675SFlorian Hahn 
1108ec40675SFlorian Hahn   /// Update \p R's operand at \p OpIdx with its corresponding VPValue for part
1118ec40675SFlorian Hahn   /// \p P.
1128ec40675SFlorian Hahn   void remapOperand(VPRecipeBase *R, unsigned OpIdx, unsigned Part) {
1138ec40675SFlorian Hahn     auto *Op = R->getOperand(OpIdx);
1148ec40675SFlorian Hahn     R->setOperand(OpIdx, getValueForPart(Op, Part));
1158ec40675SFlorian Hahn   }
1168ec40675SFlorian Hahn 
1178ec40675SFlorian Hahn   /// Update \p R's operands with their corresponding VPValues for part \p P.
1188ec40675SFlorian Hahn   void remapOperands(VPRecipeBase *R, unsigned Part) {
1198ec40675SFlorian Hahn     for (const auto &[OpIdx, Op] : enumerate(R->operands()))
1208ec40675SFlorian Hahn       R->setOperand(OpIdx, getValueForPart(Op, Part));
1218ec40675SFlorian Hahn   }
1228ec40675SFlorian Hahn };
1238ec40675SFlorian Hahn } // namespace
1248ec40675SFlorian Hahn 
1258ec40675SFlorian Hahn void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
1268ec40675SFlorian Hahn   VPBlockBase *InsertPt = VPR->getSingleSuccessor();
1278ec40675SFlorian Hahn   for (unsigned Part = 1; Part != UF; ++Part) {
1288ec40675SFlorian Hahn     auto *Copy = VPR->clone();
1298ec40675SFlorian Hahn     VPBlockUtils::insertBlockBefore(Copy, InsertPt);
1308ec40675SFlorian Hahn 
1318ec40675SFlorian Hahn     auto PartI = vp_depth_first_shallow(Copy->getEntry());
1328ec40675SFlorian Hahn     auto Part0 = vp_depth_first_shallow(VPR->getEntry());
1338ec40675SFlorian Hahn     for (const auto &[PartIVPBB, Part0VPBB] :
1348ec40675SFlorian Hahn          zip(VPBlockUtils::blocksOnly<VPBasicBlock>(PartI),
1358ec40675SFlorian Hahn              VPBlockUtils::blocksOnly<VPBasicBlock>(Part0))) {
1368ec40675SFlorian Hahn       for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
1378ec40675SFlorian Hahn         remapOperands(&PartIR, Part);
1388ec40675SFlorian Hahn         if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
1398ec40675SFlorian Hahn           ScalarIVSteps->addOperand(getConstantVPV(Part));
1408ec40675SFlorian Hahn         }
1418ec40675SFlorian Hahn 
1428ec40675SFlorian Hahn         addRecipeForPart(&Part0R, &PartIR, Part);
1438ec40675SFlorian Hahn       }
1448ec40675SFlorian Hahn     }
1458ec40675SFlorian Hahn   }
1468ec40675SFlorian Hahn }
1478ec40675SFlorian Hahn 
1488ec40675SFlorian Hahn void UnrollState::unrollWidenInductionByUF(
1498ec40675SFlorian Hahn     VPWidenIntOrFpInductionRecipe *IV, VPBasicBlock::iterator InsertPtForPhi) {
1508ec40675SFlorian Hahn   VPBasicBlock *PH = cast<VPBasicBlock>(
1518ec40675SFlorian Hahn       IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());
1528ec40675SFlorian Hahn   Type *IVTy = TypeInfo.inferScalarType(IV);
1538ec40675SFlorian Hahn   auto &ID = IV->getInductionDescriptor();
1548ec40675SFlorian Hahn   std::optional<FastMathFlags> FMFs;
1558ec40675SFlorian Hahn   if (isa_and_present<FPMathOperator>(ID.getInductionBinOp()))
1568ec40675SFlorian Hahn     FMFs = ID.getInductionBinOp()->getFastMathFlags();
1578ec40675SFlorian Hahn 
1588ec40675SFlorian Hahn   VPValue *VectorStep = &Plan.getVF();
1598ec40675SFlorian Hahn   VPBuilder Builder(PH);
1608ec40675SFlorian Hahn   if (TypeInfo.inferScalarType(VectorStep) != IVTy) {
1618ec40675SFlorian Hahn     Instruction::CastOps CastOp =
1628ec40675SFlorian Hahn         IVTy->isFloatingPointTy() ? Instruction::UIToFP : Instruction::Trunc;
1638ec40675SFlorian Hahn     VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
1648ec40675SFlorian Hahn     ToSkip.insert(VectorStep->getDefiningRecipe());
1658ec40675SFlorian Hahn   }
1668ec40675SFlorian Hahn 
1678ec40675SFlorian Hahn   VPValue *ScalarStep = IV->getStepValue();
1688ec40675SFlorian Hahn   auto *ConstStep = ScalarStep->isLiveIn()
1698ec40675SFlorian Hahn                         ? dyn_cast<ConstantInt>(ScalarStep->getLiveInIRValue())
1708ec40675SFlorian Hahn                         : nullptr;
171*4f7f71b7SFlorian Hahn   if (!ConstStep || ConstStep->getValue() != 1) {
1728ec40675SFlorian Hahn     if (TypeInfo.inferScalarType(ScalarStep) != IVTy) {
1738ec40675SFlorian Hahn       ScalarStep =
1748ec40675SFlorian Hahn           Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
1758ec40675SFlorian Hahn       ToSkip.insert(ScalarStep->getDefiningRecipe());
1768ec40675SFlorian Hahn     }
1778ec40675SFlorian Hahn 
1788ec40675SFlorian Hahn     unsigned MulOpc =
1798ec40675SFlorian Hahn         IVTy->isFloatingPointTy() ? Instruction::FMul : Instruction::Mul;
1808ec40675SFlorian Hahn     VPInstruction *Mul = Builder.createNaryOp(MulOpc, {VectorStep, ScalarStep},
1818ec40675SFlorian Hahn                                               FMFs, IV->getDebugLoc());
1828ec40675SFlorian Hahn     VectorStep = Mul;
1838ec40675SFlorian Hahn     ToSkip.insert(Mul);
1848ec40675SFlorian Hahn   }
1858ec40675SFlorian Hahn 
1868ec40675SFlorian Hahn   // Now create recipes to compute the induction steps for part 1 .. UF. Part 0
1878ec40675SFlorian Hahn   // remains the header phi. Parts > 0 are computed by adding Step to the
1888ec40675SFlorian Hahn   // previous part. The header phi recipe will get 2 new operands: the step
1898ec40675SFlorian Hahn   // value for a single part and the last part, used to compute the backedge
1908ec40675SFlorian Hahn   // value during VPWidenIntOrFpInductionRecipe::execute. %Part.0 =
1918ec40675SFlorian Hahn   // VPWidenIntOrFpInductionRecipe %Start, %ScalarStep, %VectorStep, %Part.3
1928ec40675SFlorian Hahn   // %Part.1 = %Part.0 + %VectorStep
1938ec40675SFlorian Hahn   // %Part.2 = %Part.1 + %VectorStep
1948ec40675SFlorian Hahn   // %Part.3 = %Part.2 + %VectorStep
1958ec40675SFlorian Hahn   //
1968ec40675SFlorian Hahn   // The newly added recipes are added to ToSkip to avoid interleaving them
1978ec40675SFlorian Hahn   // again.
1988ec40675SFlorian Hahn   VPValue *Prev = IV;
1998ec40675SFlorian Hahn   Builder.setInsertPoint(IV->getParent(), InsertPtForPhi);
2008ec40675SFlorian Hahn   unsigned AddOpc =
2018ec40675SFlorian Hahn       IVTy->isFloatingPointTy() ? ID.getInductionOpcode() : Instruction::Add;
2028ec40675SFlorian Hahn   for (unsigned Part = 1; Part != UF; ++Part) {
2038ec40675SFlorian Hahn     std::string Name =
2048ec40675SFlorian Hahn         Part > 1 ? "step.add." + std::to_string(Part) : "step.add";
2058ec40675SFlorian Hahn 
2068ec40675SFlorian Hahn     VPInstruction *Add = Builder.createNaryOp(AddOpc,
2078ec40675SFlorian Hahn                                               {
2088ec40675SFlorian Hahn                                                   Prev,
2098ec40675SFlorian Hahn                                                   VectorStep,
2108ec40675SFlorian Hahn                                               },
2118ec40675SFlorian Hahn                                               FMFs, IV->getDebugLoc(), Name);
2128ec40675SFlorian Hahn     ToSkip.insert(Add);
2138ec40675SFlorian Hahn     addRecipeForPart(IV, Add, Part);
2148ec40675SFlorian Hahn     Prev = Add;
2158ec40675SFlorian Hahn   }
2168ec40675SFlorian Hahn   IV->addOperand(VectorStep);
2178ec40675SFlorian Hahn   IV->addOperand(Prev);
2188ec40675SFlorian Hahn }
2198ec40675SFlorian Hahn 
2208ec40675SFlorian Hahn void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
2218ec40675SFlorian Hahn                                       VPBasicBlock::iterator InsertPtForPhi) {
2228ec40675SFlorian Hahn   // First-order recurrences pass a single vector or scalar through their header
2238ec40675SFlorian Hahn   // phis, irrespective of interleaving.
2248ec40675SFlorian Hahn   if (isa<VPFirstOrderRecurrencePHIRecipe>(R))
2258ec40675SFlorian Hahn     return;
2268ec40675SFlorian Hahn 
2278ec40675SFlorian Hahn   // Generate step vectors for each unrolled part.
2288ec40675SFlorian Hahn   if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(R)) {
2298ec40675SFlorian Hahn     unrollWidenInductionByUF(IV, InsertPtForPhi);
2308ec40675SFlorian Hahn     return;
2318ec40675SFlorian Hahn   }
2328ec40675SFlorian Hahn 
2338ec40675SFlorian Hahn   auto *RdxPhi = dyn_cast<VPReductionPHIRecipe>(R);
2348ec40675SFlorian Hahn   if (RdxPhi && RdxPhi->isOrdered())
2358ec40675SFlorian Hahn     return;
2368ec40675SFlorian Hahn 
2378ec40675SFlorian Hahn   auto InsertPt = std::next(R->getIterator());
2388ec40675SFlorian Hahn   for (unsigned Part = 1; Part != UF; ++Part) {
2398ec40675SFlorian Hahn     VPRecipeBase *Copy = R->clone();
2408ec40675SFlorian Hahn     Copy->insertBefore(*R->getParent(), InsertPt);
2418ec40675SFlorian Hahn     addRecipeForPart(R, Copy, Part);
2428ec40675SFlorian Hahn     if (isa<VPWidenPointerInductionRecipe>(R)) {
2438ec40675SFlorian Hahn       Copy->addOperand(R);
2448ec40675SFlorian Hahn       Copy->addOperand(getConstantVPV(Part));
2458ec40675SFlorian Hahn     } else if (RdxPhi) {
2468ec40675SFlorian Hahn       Copy->addOperand(getConstantVPV(Part));
2478ec40675SFlorian Hahn     } else {
2488ec40675SFlorian Hahn       assert(isa<VPActiveLaneMaskPHIRecipe>(R) &&
2498ec40675SFlorian Hahn              "unexpected header phi recipe not needing unrolled part");
2508ec40675SFlorian Hahn     }
2518ec40675SFlorian Hahn   }
2528ec40675SFlorian Hahn }
2538ec40675SFlorian Hahn 
2548ec40675SFlorian Hahn /// Handle non-header-phi recipes.
2558ec40675SFlorian Hahn void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
2568ec40675SFlorian Hahn   using namespace llvm::VPlanPatternMatch;
2578ec40675SFlorian Hahn   if (match(&R, m_BranchOnCond(m_VPValue())) ||
2588ec40675SFlorian Hahn       match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())))
2598ec40675SFlorian Hahn     return;
2608ec40675SFlorian Hahn 
2618ec40675SFlorian Hahn   if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
2628ec40675SFlorian Hahn     if (vputils::onlyFirstPartUsed(VPI)) {
2638ec40675SFlorian Hahn       addUniformForAllParts(VPI);
2648ec40675SFlorian Hahn       return;
2658ec40675SFlorian Hahn     }
2668ec40675SFlorian Hahn   }
2678ec40675SFlorian Hahn   if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
2688ec40675SFlorian Hahn     if (isa<StoreInst>(RepR->getUnderlyingValue()) &&
2698ec40675SFlorian Hahn         RepR->getOperand(1)->isDefinedOutsideLoopRegions()) {
2708ec40675SFlorian Hahn       // Stores to an invariant address only need to store the last part.
2718ec40675SFlorian Hahn       remapOperands(&R, UF - 1);
2728ec40675SFlorian Hahn       return;
2738ec40675SFlorian Hahn     }
2748ec40675SFlorian Hahn     if (auto *II = dyn_cast<IntrinsicInst>(RepR->getUnderlyingValue())) {
2758ec40675SFlorian Hahn       if (II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl) {
2768ec40675SFlorian Hahn         addUniformForAllParts(RepR);
2778ec40675SFlorian Hahn         return;
2788ec40675SFlorian Hahn       }
2798ec40675SFlorian Hahn     }
2808ec40675SFlorian Hahn   }
2818ec40675SFlorian Hahn 
2828ec40675SFlorian Hahn   // Unroll non-uniform recipes.
2838ec40675SFlorian Hahn   auto InsertPt = std::next(R.getIterator());
2848ec40675SFlorian Hahn   VPBasicBlock &VPBB = *R.getParent();
2858ec40675SFlorian Hahn   for (unsigned Part = 1; Part != UF; ++Part) {
2868ec40675SFlorian Hahn     VPRecipeBase *Copy = R.clone();
2878ec40675SFlorian Hahn     Copy->insertBefore(VPBB, InsertPt);
2888ec40675SFlorian Hahn     addRecipeForPart(&R, Copy, Part);
2898ec40675SFlorian Hahn 
2908ec40675SFlorian Hahn     VPValue *Op;
2918ec40675SFlorian Hahn     if (match(&R, m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
2928ec40675SFlorian Hahn                       m_VPValue(), m_VPValue(Op)))) {
2938ec40675SFlorian Hahn       Copy->setOperand(0, getValueForPart(Op, Part - 1));
2948ec40675SFlorian Hahn       Copy->setOperand(1, getValueForPart(Op, Part));
2958ec40675SFlorian Hahn       continue;
2968ec40675SFlorian Hahn     }
2978ec40675SFlorian Hahn     if (auto *Red = dyn_cast<VPReductionRecipe>(&R)) {
2988ec40675SFlorian Hahn       auto *Phi = cast<VPReductionPHIRecipe>(R.getOperand(0));
2998ec40675SFlorian Hahn       if (Phi->isOrdered()) {
3002c0f463bSKazu Hirata         auto &Parts = VPV2Parts[Phi];
3018ec40675SFlorian Hahn         if (Part == 1) {
3022c0f463bSKazu Hirata           Parts.clear();
3032c0f463bSKazu Hirata           Parts.push_back(Red);
3048ec40675SFlorian Hahn         }
3052c0f463bSKazu Hirata         Parts.push_back(Copy->getVPSingleValue());
3068ec40675SFlorian Hahn         Phi->setOperand(1, Copy->getVPSingleValue());
3078ec40675SFlorian Hahn       }
3088ec40675SFlorian Hahn     }
3098ec40675SFlorian Hahn     remapOperands(Copy, Part);
3108ec40675SFlorian Hahn 
3118ec40675SFlorian Hahn     // Add operand indicating the part to generate code for, to recipes still
3128ec40675SFlorian Hahn     // requiring it.
3138ec40675SFlorian Hahn     if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
314266ff98cSShih-Po Hung             VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(Copy) ||
3158ec40675SFlorian Hahn         match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
3168ec40675SFlorian Hahn                         m_VPValue())))
3178ec40675SFlorian Hahn       Copy->addOperand(getConstantVPV(Part));
3188ec40675SFlorian Hahn 
319266ff98cSShih-Po Hung     if (isa<VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(R))
3208ec40675SFlorian Hahn       Copy->setOperand(0, R.getOperand(0));
3218ec40675SFlorian Hahn   }
3228ec40675SFlorian Hahn }
3238ec40675SFlorian Hahn 
3248ec40675SFlorian Hahn using namespace llvm::VPlanPatternMatch;
3258ec40675SFlorian Hahn void UnrollState::unrollBlock(VPBlockBase *VPB) {
3268ec40675SFlorian Hahn   auto *VPR = dyn_cast<VPRegionBlock>(VPB);
3278ec40675SFlorian Hahn   if (VPR) {
3288ec40675SFlorian Hahn     if (VPR->isReplicator())
3298ec40675SFlorian Hahn       return unrollReplicateRegionByUF(VPR);
3308ec40675SFlorian Hahn 
3318ec40675SFlorian Hahn     // Traverse blocks in region in RPO to ensure defs are visited before uses
3328ec40675SFlorian Hahn     // across blocks.
3338ec40675SFlorian Hahn     ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
3348ec40675SFlorian Hahn         RPOT(VPR->getEntry());
3358ec40675SFlorian Hahn     for (VPBlockBase *VPB : RPOT)
3368ec40675SFlorian Hahn       unrollBlock(VPB);
3378ec40675SFlorian Hahn     return;
3388ec40675SFlorian Hahn   }
3398ec40675SFlorian Hahn 
3408ec40675SFlorian Hahn   // VPB is a VPBasicBlock; unroll it, i.e., unroll its recipes.
3418ec40675SFlorian Hahn   auto *VPBB = cast<VPBasicBlock>(VPB);
3428ec40675SFlorian Hahn   auto InsertPtForPhi = VPBB->getFirstNonPhi();
3438ec40675SFlorian Hahn   for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
3448ec40675SFlorian Hahn     if (ToSkip.contains(&R) || isa<VPIRInstruction>(&R))
3458ec40675SFlorian Hahn       continue;
3468ec40675SFlorian Hahn 
3478ec40675SFlorian Hahn     // Add all VPValues for all parts to ComputeReductionResult which combines
3488ec40675SFlorian Hahn     // the parts to compute the final reduction value.
3498ec40675SFlorian Hahn     VPValue *Op1;
3508ec40675SFlorian Hahn     if (match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
3518ec40675SFlorian Hahn                       m_VPValue(), m_VPValue(Op1)))) {
3528ec40675SFlorian Hahn       addUniformForAllParts(cast<VPInstruction>(&R));
3538ec40675SFlorian Hahn       for (unsigned Part = 1; Part != UF; ++Part)
3548ec40675SFlorian Hahn         R.addOperand(getValueForPart(Op1, Part));
3558ec40675SFlorian Hahn       continue;
3568ec40675SFlorian Hahn     }
3578ec40675SFlorian Hahn     VPValue *Op0;
3588ec40675SFlorian Hahn     if (match(&R, m_VPInstruction<VPInstruction::ExtractFromEnd>(
3598ec40675SFlorian Hahn                       m_VPValue(Op0), m_VPValue(Op1)))) {
3608ec40675SFlorian Hahn       addUniformForAllParts(cast<VPSingleDefRecipe>(&R));
3618ec40675SFlorian Hahn       if (Plan.hasScalarVFOnly()) {
3628ec40675SFlorian Hahn         // Extracting from end with VF = 1 implies retrieving the scalar part UF
3638ec40675SFlorian Hahn         // - Op1.
3648ec40675SFlorian Hahn         unsigned Offset =
3658ec40675SFlorian Hahn             cast<ConstantInt>(Op1->getLiveInIRValue())->getZExtValue();
3668ec40675SFlorian Hahn         R.getVPSingleValue()->replaceAllUsesWith(
3678ec40675SFlorian Hahn             getValueForPart(Op0, UF - Offset));
3688ec40675SFlorian Hahn         R.eraseFromParent();
3698ec40675SFlorian Hahn       } else {
3708ec40675SFlorian Hahn         // Otherwise we extract from the last part.
3718ec40675SFlorian Hahn         remapOperands(&R, UF - 1);
3728ec40675SFlorian Hahn       }
3738ec40675SFlorian Hahn       continue;
3748ec40675SFlorian Hahn     }
3758ec40675SFlorian Hahn 
3768ec40675SFlorian Hahn     auto *SingleDef = dyn_cast<VPSingleDefRecipe>(&R);
3778ec40675SFlorian Hahn     if (SingleDef && vputils::isUniformAcrossVFsAndUFs(SingleDef)) {
3788ec40675SFlorian Hahn       addUniformForAllParts(SingleDef);
3798ec40675SFlorian Hahn       continue;
3808ec40675SFlorian Hahn     }
3818ec40675SFlorian Hahn 
3828ec40675SFlorian Hahn     if (auto *H = dyn_cast<VPHeaderPHIRecipe>(&R)) {
3838ec40675SFlorian Hahn       unrollHeaderPHIByUF(H, InsertPtForPhi);
3848ec40675SFlorian Hahn       continue;
3858ec40675SFlorian Hahn     }
3868ec40675SFlorian Hahn 
3878ec40675SFlorian Hahn     unrollRecipeByUF(R);
3888ec40675SFlorian Hahn   }
3898ec40675SFlorian Hahn }
3908ec40675SFlorian Hahn 
3918ec40675SFlorian Hahn void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
3928ec40675SFlorian Hahn   assert(UF > 0 && "Unroll factor must be positive");
3938ec40675SFlorian Hahn   Plan.setUF(UF);
3948ec40675SFlorian Hahn   auto Cleanup = make_scope_exit([&Plan]() {
3958ec40675SFlorian Hahn     auto Iter = vp_depth_first_deep(Plan.getEntry());
3968ec40675SFlorian Hahn     // Remove recipes that are redundant after unrolling.
3978ec40675SFlorian Hahn     for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
3988ec40675SFlorian Hahn       for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
3998ec40675SFlorian Hahn         auto *VPI = dyn_cast<VPInstruction>(&R);
4008ec40675SFlorian Hahn         if (VPI &&
4018ec40675SFlorian Hahn             VPI->getOpcode() == VPInstruction::CanonicalIVIncrementForPart &&
4028ec40675SFlorian Hahn             VPI->getNumOperands() == 1) {
4038ec40675SFlorian Hahn           VPI->replaceAllUsesWith(VPI->getOperand(0));
4048ec40675SFlorian Hahn           VPI->eraseFromParent();
4058ec40675SFlorian Hahn         }
4068ec40675SFlorian Hahn       }
4078ec40675SFlorian Hahn     }
4088ec40675SFlorian Hahn   });
4098ec40675SFlorian Hahn   if (UF == 1) {
4108ec40675SFlorian Hahn     return;
4118ec40675SFlorian Hahn   }
4128ec40675SFlorian Hahn 
4138ec40675SFlorian Hahn   UnrollState Unroller(Plan, UF, Ctx);
4148ec40675SFlorian Hahn 
4158ec40675SFlorian Hahn   // Iterate over all blocks in the plan starting from Entry, and unroll
4168ec40675SFlorian Hahn   // recipes inside them. This includes the vector preheader and middle blocks,
4178ec40675SFlorian Hahn   // which may set up or post-process per-part values.
4188ec40675SFlorian Hahn   ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
4198ec40675SFlorian Hahn       Plan.getEntry());
4208ec40675SFlorian Hahn   for (VPBlockBase *VPB : RPOT)
4218ec40675SFlorian Hahn     Unroller.unrollBlock(VPB);
4228ec40675SFlorian Hahn 
4238ec40675SFlorian Hahn   unsigned Part = 1;
4248ec40675SFlorian Hahn   // Remap operands of cloned header phis to update backedge values. The header
4258ec40675SFlorian Hahn   // phis cloned during unrolling are just after the header phi for part 0.
4268ec40675SFlorian Hahn   // Reset Part to 1 when reaching the first (part 0) recipe of a block.
4278ec40675SFlorian Hahn   for (VPRecipeBase &H :
4288ec40675SFlorian Hahn        Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
4298ec40675SFlorian Hahn     // The second operand of Fixed Order Recurrence phi's, feeding the spliced
4308ec40675SFlorian Hahn     // value across the backedge, needs to remap to the last part of the spliced
4318ec40675SFlorian Hahn     // value.
4328ec40675SFlorian Hahn     if (isa<VPFirstOrderRecurrencePHIRecipe>(&H)) {
4338ec40675SFlorian Hahn       Unroller.remapOperand(&H, 1, UF - 1);
4348ec40675SFlorian Hahn       continue;
4358ec40675SFlorian Hahn     }
4368ec40675SFlorian Hahn     if (Unroller.contains(H.getVPSingleValue()) ||
4378ec40675SFlorian Hahn         isa<VPWidenPointerInductionRecipe>(&H)) {
4388ec40675SFlorian Hahn       Part = 1;
4398ec40675SFlorian Hahn       continue;
4408ec40675SFlorian Hahn     }
4418ec40675SFlorian Hahn     Unroller.remapOperands(&H, Part);
4428ec40675SFlorian Hahn     Part++;
4438ec40675SFlorian Hahn   }
4448ec40675SFlorian Hahn 
44553266f73SFlorian Hahn   VPlanTransforms::removeDeadRecipes(Plan);
4468ec40675SFlorian Hahn }
447