18ec40675SFlorian Hahn //===-- VPlanUnroll.cpp - VPlan unroller ----------------------------------===// 28ec40675SFlorian Hahn // 38ec40675SFlorian Hahn // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 48ec40675SFlorian Hahn // See https://llvm.org/LICENSE.txt for license information. 58ec40675SFlorian Hahn // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 68ec40675SFlorian Hahn // 78ec40675SFlorian Hahn //===----------------------------------------------------------------------===// 88ec40675SFlorian Hahn /// 98ec40675SFlorian Hahn /// \file 108ec40675SFlorian Hahn /// This file implements explicit unrolling for VPlans. 118ec40675SFlorian Hahn /// 128ec40675SFlorian Hahn //===----------------------------------------------------------------------===// 138ec40675SFlorian Hahn 148ec40675SFlorian Hahn #include "VPRecipeBuilder.h" 158ec40675SFlorian Hahn #include "VPlan.h" 168ec40675SFlorian Hahn #include "VPlanAnalysis.h" 178ec40675SFlorian Hahn #include "VPlanCFG.h" 188ec40675SFlorian Hahn #include "VPlanPatternMatch.h" 198ec40675SFlorian Hahn #include "VPlanTransforms.h" 208ec40675SFlorian Hahn #include "VPlanUtils.h" 218ec40675SFlorian Hahn #include "llvm/ADT/PostOrderIterator.h" 228ec40675SFlorian Hahn #include "llvm/ADT/STLExtras.h" 238ec40675SFlorian Hahn #include "llvm/ADT/ScopeExit.h" 248ec40675SFlorian Hahn #include "llvm/Analysis/IVDescriptors.h" 258ec40675SFlorian Hahn #include "llvm/IR/Intrinsics.h" 268ec40675SFlorian Hahn 278ec40675SFlorian Hahn using namespace llvm; 288ec40675SFlorian Hahn 298ec40675SFlorian Hahn namespace { 308ec40675SFlorian Hahn 318ec40675SFlorian Hahn /// Helper to hold state needed for unrolling. It holds the Plan to unroll by 328ec40675SFlorian Hahn /// UF. It also holds copies of VPValues across UF-1 unroll parts to facilitate 338ec40675SFlorian Hahn /// the unrolling transformation, where the original VPValues are retained for 348ec40675SFlorian Hahn /// part zero. 358ec40675SFlorian Hahn class UnrollState { 368ec40675SFlorian Hahn /// Plan to unroll. 378ec40675SFlorian Hahn VPlan &Plan; 388ec40675SFlorian Hahn /// Unroll factor to unroll by. 398ec40675SFlorian Hahn const unsigned UF; 408ec40675SFlorian Hahn /// Analysis for types. 418ec40675SFlorian Hahn VPTypeAnalysis TypeInfo; 428ec40675SFlorian Hahn 438ec40675SFlorian Hahn /// Unrolling may create recipes that should not be unrolled themselves. 448ec40675SFlorian Hahn /// Those are tracked in ToSkip. 458ec40675SFlorian Hahn SmallPtrSet<VPRecipeBase *, 8> ToSkip; 468ec40675SFlorian Hahn 478ec40675SFlorian Hahn // Associate with each VPValue of part 0 its unrolled instances of parts 1, 488ec40675SFlorian Hahn // ..., UF-1. 498ec40675SFlorian Hahn DenseMap<VPValue *, SmallVector<VPValue *>> VPV2Parts; 508ec40675SFlorian Hahn 518ec40675SFlorian Hahn /// Unroll replicate region \p VPR by cloning the region UF - 1 times. 528ec40675SFlorian Hahn void unrollReplicateRegionByUF(VPRegionBlock *VPR); 538ec40675SFlorian Hahn 548ec40675SFlorian Hahn /// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across 558ec40675SFlorian Hahn /// all parts. 568ec40675SFlorian Hahn void unrollRecipeByUF(VPRecipeBase &R); 578ec40675SFlorian Hahn 588ec40675SFlorian Hahn /// Unroll header phi recipe \p R. How exactly the recipe gets unrolled 598ec40675SFlorian Hahn /// depends on the concrete header phi. Inserts newly created recipes at \p 608ec40675SFlorian Hahn /// InsertPtForPhi. 618ec40675SFlorian Hahn void unrollHeaderPHIByUF(VPHeaderPHIRecipe *R, 628ec40675SFlorian Hahn VPBasicBlock::iterator InsertPtForPhi); 638ec40675SFlorian Hahn 648ec40675SFlorian Hahn /// Unroll a widen induction recipe \p IV. This introduces recipes to compute 658ec40675SFlorian Hahn /// the induction steps for each part. 668ec40675SFlorian Hahn void unrollWidenInductionByUF(VPWidenIntOrFpInductionRecipe *IV, 678ec40675SFlorian Hahn VPBasicBlock::iterator InsertPtForPhi); 688ec40675SFlorian Hahn 698ec40675SFlorian Hahn VPValue *getConstantVPV(unsigned Part) { 708ec40675SFlorian Hahn Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType(); 718ec40675SFlorian Hahn return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part)); 728ec40675SFlorian Hahn } 738ec40675SFlorian Hahn 748ec40675SFlorian Hahn public: 758ec40675SFlorian Hahn UnrollState(VPlan &Plan, unsigned UF, LLVMContext &Ctx) 768ec40675SFlorian Hahn : Plan(Plan), UF(UF), TypeInfo(Plan.getCanonicalIV()->getScalarType()) {} 778ec40675SFlorian Hahn 788ec40675SFlorian Hahn void unrollBlock(VPBlockBase *VPB); 798ec40675SFlorian Hahn 808ec40675SFlorian Hahn VPValue *getValueForPart(VPValue *V, unsigned Part) { 818ec40675SFlorian Hahn if (Part == 0 || V->isLiveIn()) 828ec40675SFlorian Hahn return V; 838ec40675SFlorian Hahn assert((VPV2Parts.contains(V) && VPV2Parts[V].size() >= Part) && 848ec40675SFlorian Hahn "accessed value does not exist"); 858ec40675SFlorian Hahn return VPV2Parts[V][Part - 1]; 868ec40675SFlorian Hahn } 878ec40675SFlorian Hahn 888ec40675SFlorian Hahn /// Given a single original recipe \p OrigR (of part zero), and its copy \p 898ec40675SFlorian Hahn /// CopyR for part \p Part, map every VPValue defined by \p OrigR to its 908ec40675SFlorian Hahn /// corresponding VPValue defined by \p CopyR. 918ec40675SFlorian Hahn void addRecipeForPart(VPRecipeBase *OrigR, VPRecipeBase *CopyR, 928ec40675SFlorian Hahn unsigned Part) { 938ec40675SFlorian Hahn for (const auto &[Idx, VPV] : enumerate(OrigR->definedValues())) { 948ec40675SFlorian Hahn auto Ins = VPV2Parts.insert({VPV, {}}); 958ec40675SFlorian Hahn assert(Ins.first->second.size() == Part - 1 && "earlier parts not set"); 968ec40675SFlorian Hahn Ins.first->second.push_back(CopyR->getVPValue(Idx)); 978ec40675SFlorian Hahn } 988ec40675SFlorian Hahn } 998ec40675SFlorian Hahn 1008ec40675SFlorian Hahn /// Given a uniform recipe \p R, add it for all parts. 1018ec40675SFlorian Hahn void addUniformForAllParts(VPSingleDefRecipe *R) { 1028ec40675SFlorian Hahn auto Ins = VPV2Parts.insert({R, {}}); 1038ec40675SFlorian Hahn assert(Ins.second && "uniform value already added"); 1048ec40675SFlorian Hahn for (unsigned Part = 0; Part != UF; ++Part) 1058ec40675SFlorian Hahn Ins.first->second.push_back(R); 1068ec40675SFlorian Hahn } 1078ec40675SFlorian Hahn 1088ec40675SFlorian Hahn bool contains(VPValue *VPV) const { return VPV2Parts.contains(VPV); } 1098ec40675SFlorian Hahn 1108ec40675SFlorian Hahn /// Update \p R's operand at \p OpIdx with its corresponding VPValue for part 1118ec40675SFlorian Hahn /// \p P. 1128ec40675SFlorian Hahn void remapOperand(VPRecipeBase *R, unsigned OpIdx, unsigned Part) { 1138ec40675SFlorian Hahn auto *Op = R->getOperand(OpIdx); 1148ec40675SFlorian Hahn R->setOperand(OpIdx, getValueForPart(Op, Part)); 1158ec40675SFlorian Hahn } 1168ec40675SFlorian Hahn 1178ec40675SFlorian Hahn /// Update \p R's operands with their corresponding VPValues for part \p P. 1188ec40675SFlorian Hahn void remapOperands(VPRecipeBase *R, unsigned Part) { 1198ec40675SFlorian Hahn for (const auto &[OpIdx, Op] : enumerate(R->operands())) 1208ec40675SFlorian Hahn R->setOperand(OpIdx, getValueForPart(Op, Part)); 1218ec40675SFlorian Hahn } 1228ec40675SFlorian Hahn }; 1238ec40675SFlorian Hahn } // namespace 1248ec40675SFlorian Hahn 1258ec40675SFlorian Hahn void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) { 1268ec40675SFlorian Hahn VPBlockBase *InsertPt = VPR->getSingleSuccessor(); 1278ec40675SFlorian Hahn for (unsigned Part = 1; Part != UF; ++Part) { 1288ec40675SFlorian Hahn auto *Copy = VPR->clone(); 1298ec40675SFlorian Hahn VPBlockUtils::insertBlockBefore(Copy, InsertPt); 1308ec40675SFlorian Hahn 1318ec40675SFlorian Hahn auto PartI = vp_depth_first_shallow(Copy->getEntry()); 1328ec40675SFlorian Hahn auto Part0 = vp_depth_first_shallow(VPR->getEntry()); 1338ec40675SFlorian Hahn for (const auto &[PartIVPBB, Part0VPBB] : 1348ec40675SFlorian Hahn zip(VPBlockUtils::blocksOnly<VPBasicBlock>(PartI), 1358ec40675SFlorian Hahn VPBlockUtils::blocksOnly<VPBasicBlock>(Part0))) { 1368ec40675SFlorian Hahn for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) { 1378ec40675SFlorian Hahn remapOperands(&PartIR, Part); 1388ec40675SFlorian Hahn if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) { 1398ec40675SFlorian Hahn ScalarIVSteps->addOperand(getConstantVPV(Part)); 1408ec40675SFlorian Hahn } 1418ec40675SFlorian Hahn 1428ec40675SFlorian Hahn addRecipeForPart(&Part0R, &PartIR, Part); 1438ec40675SFlorian Hahn } 1448ec40675SFlorian Hahn } 1458ec40675SFlorian Hahn } 1468ec40675SFlorian Hahn } 1478ec40675SFlorian Hahn 1488ec40675SFlorian Hahn void UnrollState::unrollWidenInductionByUF( 1498ec40675SFlorian Hahn VPWidenIntOrFpInductionRecipe *IV, VPBasicBlock::iterator InsertPtForPhi) { 1508ec40675SFlorian Hahn VPBasicBlock *PH = cast<VPBasicBlock>( 1518ec40675SFlorian Hahn IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor()); 1528ec40675SFlorian Hahn Type *IVTy = TypeInfo.inferScalarType(IV); 1538ec40675SFlorian Hahn auto &ID = IV->getInductionDescriptor(); 1548ec40675SFlorian Hahn std::optional<FastMathFlags> FMFs; 1558ec40675SFlorian Hahn if (isa_and_present<FPMathOperator>(ID.getInductionBinOp())) 1568ec40675SFlorian Hahn FMFs = ID.getInductionBinOp()->getFastMathFlags(); 1578ec40675SFlorian Hahn 1588ec40675SFlorian Hahn VPValue *VectorStep = &Plan.getVF(); 1598ec40675SFlorian Hahn VPBuilder Builder(PH); 1608ec40675SFlorian Hahn if (TypeInfo.inferScalarType(VectorStep) != IVTy) { 1618ec40675SFlorian Hahn Instruction::CastOps CastOp = 1628ec40675SFlorian Hahn IVTy->isFloatingPointTy() ? Instruction::UIToFP : Instruction::Trunc; 1638ec40675SFlorian Hahn VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy); 1648ec40675SFlorian Hahn ToSkip.insert(VectorStep->getDefiningRecipe()); 1658ec40675SFlorian Hahn } 1668ec40675SFlorian Hahn 1678ec40675SFlorian Hahn VPValue *ScalarStep = IV->getStepValue(); 1688ec40675SFlorian Hahn auto *ConstStep = ScalarStep->isLiveIn() 1698ec40675SFlorian Hahn ? dyn_cast<ConstantInt>(ScalarStep->getLiveInIRValue()) 1708ec40675SFlorian Hahn : nullptr; 171*4f7f71b7SFlorian Hahn if (!ConstStep || ConstStep->getValue() != 1) { 1728ec40675SFlorian Hahn if (TypeInfo.inferScalarType(ScalarStep) != IVTy) { 1738ec40675SFlorian Hahn ScalarStep = 1748ec40675SFlorian Hahn Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy); 1758ec40675SFlorian Hahn ToSkip.insert(ScalarStep->getDefiningRecipe()); 1768ec40675SFlorian Hahn } 1778ec40675SFlorian Hahn 1788ec40675SFlorian Hahn unsigned MulOpc = 1798ec40675SFlorian Hahn IVTy->isFloatingPointTy() ? Instruction::FMul : Instruction::Mul; 1808ec40675SFlorian Hahn VPInstruction *Mul = Builder.createNaryOp(MulOpc, {VectorStep, ScalarStep}, 1818ec40675SFlorian Hahn FMFs, IV->getDebugLoc()); 1828ec40675SFlorian Hahn VectorStep = Mul; 1838ec40675SFlorian Hahn ToSkip.insert(Mul); 1848ec40675SFlorian Hahn } 1858ec40675SFlorian Hahn 1868ec40675SFlorian Hahn // Now create recipes to compute the induction steps for part 1 .. UF. Part 0 1878ec40675SFlorian Hahn // remains the header phi. Parts > 0 are computed by adding Step to the 1888ec40675SFlorian Hahn // previous part. The header phi recipe will get 2 new operands: the step 1898ec40675SFlorian Hahn // value for a single part and the last part, used to compute the backedge 1908ec40675SFlorian Hahn // value during VPWidenIntOrFpInductionRecipe::execute. %Part.0 = 1918ec40675SFlorian Hahn // VPWidenIntOrFpInductionRecipe %Start, %ScalarStep, %VectorStep, %Part.3 1928ec40675SFlorian Hahn // %Part.1 = %Part.0 + %VectorStep 1938ec40675SFlorian Hahn // %Part.2 = %Part.1 + %VectorStep 1948ec40675SFlorian Hahn // %Part.3 = %Part.2 + %VectorStep 1958ec40675SFlorian Hahn // 1968ec40675SFlorian Hahn // The newly added recipes are added to ToSkip to avoid interleaving them 1978ec40675SFlorian Hahn // again. 1988ec40675SFlorian Hahn VPValue *Prev = IV; 1998ec40675SFlorian Hahn Builder.setInsertPoint(IV->getParent(), InsertPtForPhi); 2008ec40675SFlorian Hahn unsigned AddOpc = 2018ec40675SFlorian Hahn IVTy->isFloatingPointTy() ? ID.getInductionOpcode() : Instruction::Add; 2028ec40675SFlorian Hahn for (unsigned Part = 1; Part != UF; ++Part) { 2038ec40675SFlorian Hahn std::string Name = 2048ec40675SFlorian Hahn Part > 1 ? "step.add." + std::to_string(Part) : "step.add"; 2058ec40675SFlorian Hahn 2068ec40675SFlorian Hahn VPInstruction *Add = Builder.createNaryOp(AddOpc, 2078ec40675SFlorian Hahn { 2088ec40675SFlorian Hahn Prev, 2098ec40675SFlorian Hahn VectorStep, 2108ec40675SFlorian Hahn }, 2118ec40675SFlorian Hahn FMFs, IV->getDebugLoc(), Name); 2128ec40675SFlorian Hahn ToSkip.insert(Add); 2138ec40675SFlorian Hahn addRecipeForPart(IV, Add, Part); 2148ec40675SFlorian Hahn Prev = Add; 2158ec40675SFlorian Hahn } 2168ec40675SFlorian Hahn IV->addOperand(VectorStep); 2178ec40675SFlorian Hahn IV->addOperand(Prev); 2188ec40675SFlorian Hahn } 2198ec40675SFlorian Hahn 2208ec40675SFlorian Hahn void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R, 2218ec40675SFlorian Hahn VPBasicBlock::iterator InsertPtForPhi) { 2228ec40675SFlorian Hahn // First-order recurrences pass a single vector or scalar through their header 2238ec40675SFlorian Hahn // phis, irrespective of interleaving. 2248ec40675SFlorian Hahn if (isa<VPFirstOrderRecurrencePHIRecipe>(R)) 2258ec40675SFlorian Hahn return; 2268ec40675SFlorian Hahn 2278ec40675SFlorian Hahn // Generate step vectors for each unrolled part. 2288ec40675SFlorian Hahn if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(R)) { 2298ec40675SFlorian Hahn unrollWidenInductionByUF(IV, InsertPtForPhi); 2308ec40675SFlorian Hahn return; 2318ec40675SFlorian Hahn } 2328ec40675SFlorian Hahn 2338ec40675SFlorian Hahn auto *RdxPhi = dyn_cast<VPReductionPHIRecipe>(R); 2348ec40675SFlorian Hahn if (RdxPhi && RdxPhi->isOrdered()) 2358ec40675SFlorian Hahn return; 2368ec40675SFlorian Hahn 2378ec40675SFlorian Hahn auto InsertPt = std::next(R->getIterator()); 2388ec40675SFlorian Hahn for (unsigned Part = 1; Part != UF; ++Part) { 2398ec40675SFlorian Hahn VPRecipeBase *Copy = R->clone(); 2408ec40675SFlorian Hahn Copy->insertBefore(*R->getParent(), InsertPt); 2418ec40675SFlorian Hahn addRecipeForPart(R, Copy, Part); 2428ec40675SFlorian Hahn if (isa<VPWidenPointerInductionRecipe>(R)) { 2438ec40675SFlorian Hahn Copy->addOperand(R); 2448ec40675SFlorian Hahn Copy->addOperand(getConstantVPV(Part)); 2458ec40675SFlorian Hahn } else if (RdxPhi) { 2468ec40675SFlorian Hahn Copy->addOperand(getConstantVPV(Part)); 2478ec40675SFlorian Hahn } else { 2488ec40675SFlorian Hahn assert(isa<VPActiveLaneMaskPHIRecipe>(R) && 2498ec40675SFlorian Hahn "unexpected header phi recipe not needing unrolled part"); 2508ec40675SFlorian Hahn } 2518ec40675SFlorian Hahn } 2528ec40675SFlorian Hahn } 2538ec40675SFlorian Hahn 2548ec40675SFlorian Hahn /// Handle non-header-phi recipes. 2558ec40675SFlorian Hahn void UnrollState::unrollRecipeByUF(VPRecipeBase &R) { 2568ec40675SFlorian Hahn using namespace llvm::VPlanPatternMatch; 2578ec40675SFlorian Hahn if (match(&R, m_BranchOnCond(m_VPValue())) || 2588ec40675SFlorian Hahn match(&R, m_BranchOnCount(m_VPValue(), m_VPValue()))) 2598ec40675SFlorian Hahn return; 2608ec40675SFlorian Hahn 2618ec40675SFlorian Hahn if (auto *VPI = dyn_cast<VPInstruction>(&R)) { 2628ec40675SFlorian Hahn if (vputils::onlyFirstPartUsed(VPI)) { 2638ec40675SFlorian Hahn addUniformForAllParts(VPI); 2648ec40675SFlorian Hahn return; 2658ec40675SFlorian Hahn } 2668ec40675SFlorian Hahn } 2678ec40675SFlorian Hahn if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) { 2688ec40675SFlorian Hahn if (isa<StoreInst>(RepR->getUnderlyingValue()) && 2698ec40675SFlorian Hahn RepR->getOperand(1)->isDefinedOutsideLoopRegions()) { 2708ec40675SFlorian Hahn // Stores to an invariant address only need to store the last part. 2718ec40675SFlorian Hahn remapOperands(&R, UF - 1); 2728ec40675SFlorian Hahn return; 2738ec40675SFlorian Hahn } 2748ec40675SFlorian Hahn if (auto *II = dyn_cast<IntrinsicInst>(RepR->getUnderlyingValue())) { 2758ec40675SFlorian Hahn if (II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl) { 2768ec40675SFlorian Hahn addUniformForAllParts(RepR); 2778ec40675SFlorian Hahn return; 2788ec40675SFlorian Hahn } 2798ec40675SFlorian Hahn } 2808ec40675SFlorian Hahn } 2818ec40675SFlorian Hahn 2828ec40675SFlorian Hahn // Unroll non-uniform recipes. 2838ec40675SFlorian Hahn auto InsertPt = std::next(R.getIterator()); 2848ec40675SFlorian Hahn VPBasicBlock &VPBB = *R.getParent(); 2858ec40675SFlorian Hahn for (unsigned Part = 1; Part != UF; ++Part) { 2868ec40675SFlorian Hahn VPRecipeBase *Copy = R.clone(); 2878ec40675SFlorian Hahn Copy->insertBefore(VPBB, InsertPt); 2888ec40675SFlorian Hahn addRecipeForPart(&R, Copy, Part); 2898ec40675SFlorian Hahn 2908ec40675SFlorian Hahn VPValue *Op; 2918ec40675SFlorian Hahn if (match(&R, m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>( 2928ec40675SFlorian Hahn m_VPValue(), m_VPValue(Op)))) { 2938ec40675SFlorian Hahn Copy->setOperand(0, getValueForPart(Op, Part - 1)); 2948ec40675SFlorian Hahn Copy->setOperand(1, getValueForPart(Op, Part)); 2958ec40675SFlorian Hahn continue; 2968ec40675SFlorian Hahn } 2978ec40675SFlorian Hahn if (auto *Red = dyn_cast<VPReductionRecipe>(&R)) { 2988ec40675SFlorian Hahn auto *Phi = cast<VPReductionPHIRecipe>(R.getOperand(0)); 2998ec40675SFlorian Hahn if (Phi->isOrdered()) { 3002c0f463bSKazu Hirata auto &Parts = VPV2Parts[Phi]; 3018ec40675SFlorian Hahn if (Part == 1) { 3022c0f463bSKazu Hirata Parts.clear(); 3032c0f463bSKazu Hirata Parts.push_back(Red); 3048ec40675SFlorian Hahn } 3052c0f463bSKazu Hirata Parts.push_back(Copy->getVPSingleValue()); 3068ec40675SFlorian Hahn Phi->setOperand(1, Copy->getVPSingleValue()); 3078ec40675SFlorian Hahn } 3088ec40675SFlorian Hahn } 3098ec40675SFlorian Hahn remapOperands(Copy, Part); 3108ec40675SFlorian Hahn 3118ec40675SFlorian Hahn // Add operand indicating the part to generate code for, to recipes still 3128ec40675SFlorian Hahn // requiring it. 3138ec40675SFlorian Hahn if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe, 314266ff98cSShih-Po Hung VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(Copy) || 3158ec40675SFlorian Hahn match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>( 3168ec40675SFlorian Hahn m_VPValue()))) 3178ec40675SFlorian Hahn Copy->addOperand(getConstantVPV(Part)); 3188ec40675SFlorian Hahn 319266ff98cSShih-Po Hung if (isa<VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(R)) 3208ec40675SFlorian Hahn Copy->setOperand(0, R.getOperand(0)); 3218ec40675SFlorian Hahn } 3228ec40675SFlorian Hahn } 3238ec40675SFlorian Hahn 3248ec40675SFlorian Hahn using namespace llvm::VPlanPatternMatch; 3258ec40675SFlorian Hahn void UnrollState::unrollBlock(VPBlockBase *VPB) { 3268ec40675SFlorian Hahn auto *VPR = dyn_cast<VPRegionBlock>(VPB); 3278ec40675SFlorian Hahn if (VPR) { 3288ec40675SFlorian Hahn if (VPR->isReplicator()) 3298ec40675SFlorian Hahn return unrollReplicateRegionByUF(VPR); 3308ec40675SFlorian Hahn 3318ec40675SFlorian Hahn // Traverse blocks in region in RPO to ensure defs are visited before uses 3328ec40675SFlorian Hahn // across blocks. 3338ec40675SFlorian Hahn ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> 3348ec40675SFlorian Hahn RPOT(VPR->getEntry()); 3358ec40675SFlorian Hahn for (VPBlockBase *VPB : RPOT) 3368ec40675SFlorian Hahn unrollBlock(VPB); 3378ec40675SFlorian Hahn return; 3388ec40675SFlorian Hahn } 3398ec40675SFlorian Hahn 3408ec40675SFlorian Hahn // VPB is a VPBasicBlock; unroll it, i.e., unroll its recipes. 3418ec40675SFlorian Hahn auto *VPBB = cast<VPBasicBlock>(VPB); 3428ec40675SFlorian Hahn auto InsertPtForPhi = VPBB->getFirstNonPhi(); 3438ec40675SFlorian Hahn for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { 3448ec40675SFlorian Hahn if (ToSkip.contains(&R) || isa<VPIRInstruction>(&R)) 3458ec40675SFlorian Hahn continue; 3468ec40675SFlorian Hahn 3478ec40675SFlorian Hahn // Add all VPValues for all parts to ComputeReductionResult which combines 3488ec40675SFlorian Hahn // the parts to compute the final reduction value. 3498ec40675SFlorian Hahn VPValue *Op1; 3508ec40675SFlorian Hahn if (match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>( 3518ec40675SFlorian Hahn m_VPValue(), m_VPValue(Op1)))) { 3528ec40675SFlorian Hahn addUniformForAllParts(cast<VPInstruction>(&R)); 3538ec40675SFlorian Hahn for (unsigned Part = 1; Part != UF; ++Part) 3548ec40675SFlorian Hahn R.addOperand(getValueForPart(Op1, Part)); 3558ec40675SFlorian Hahn continue; 3568ec40675SFlorian Hahn } 3578ec40675SFlorian Hahn VPValue *Op0; 3588ec40675SFlorian Hahn if (match(&R, m_VPInstruction<VPInstruction::ExtractFromEnd>( 3598ec40675SFlorian Hahn m_VPValue(Op0), m_VPValue(Op1)))) { 3608ec40675SFlorian Hahn addUniformForAllParts(cast<VPSingleDefRecipe>(&R)); 3618ec40675SFlorian Hahn if (Plan.hasScalarVFOnly()) { 3628ec40675SFlorian Hahn // Extracting from end with VF = 1 implies retrieving the scalar part UF 3638ec40675SFlorian Hahn // - Op1. 3648ec40675SFlorian Hahn unsigned Offset = 3658ec40675SFlorian Hahn cast<ConstantInt>(Op1->getLiveInIRValue())->getZExtValue(); 3668ec40675SFlorian Hahn R.getVPSingleValue()->replaceAllUsesWith( 3678ec40675SFlorian Hahn getValueForPart(Op0, UF - Offset)); 3688ec40675SFlorian Hahn R.eraseFromParent(); 3698ec40675SFlorian Hahn } else { 3708ec40675SFlorian Hahn // Otherwise we extract from the last part. 3718ec40675SFlorian Hahn remapOperands(&R, UF - 1); 3728ec40675SFlorian Hahn } 3738ec40675SFlorian Hahn continue; 3748ec40675SFlorian Hahn } 3758ec40675SFlorian Hahn 3768ec40675SFlorian Hahn auto *SingleDef = dyn_cast<VPSingleDefRecipe>(&R); 3778ec40675SFlorian Hahn if (SingleDef && vputils::isUniformAcrossVFsAndUFs(SingleDef)) { 3788ec40675SFlorian Hahn addUniformForAllParts(SingleDef); 3798ec40675SFlorian Hahn continue; 3808ec40675SFlorian Hahn } 3818ec40675SFlorian Hahn 3828ec40675SFlorian Hahn if (auto *H = dyn_cast<VPHeaderPHIRecipe>(&R)) { 3838ec40675SFlorian Hahn unrollHeaderPHIByUF(H, InsertPtForPhi); 3848ec40675SFlorian Hahn continue; 3858ec40675SFlorian Hahn } 3868ec40675SFlorian Hahn 3878ec40675SFlorian Hahn unrollRecipeByUF(R); 3888ec40675SFlorian Hahn } 3898ec40675SFlorian Hahn } 3908ec40675SFlorian Hahn 3918ec40675SFlorian Hahn void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) { 3928ec40675SFlorian Hahn assert(UF > 0 && "Unroll factor must be positive"); 3938ec40675SFlorian Hahn Plan.setUF(UF); 3948ec40675SFlorian Hahn auto Cleanup = make_scope_exit([&Plan]() { 3958ec40675SFlorian Hahn auto Iter = vp_depth_first_deep(Plan.getEntry()); 3968ec40675SFlorian Hahn // Remove recipes that are redundant after unrolling. 3978ec40675SFlorian Hahn for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) { 3988ec40675SFlorian Hahn for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { 3998ec40675SFlorian Hahn auto *VPI = dyn_cast<VPInstruction>(&R); 4008ec40675SFlorian Hahn if (VPI && 4018ec40675SFlorian Hahn VPI->getOpcode() == VPInstruction::CanonicalIVIncrementForPart && 4028ec40675SFlorian Hahn VPI->getNumOperands() == 1) { 4038ec40675SFlorian Hahn VPI->replaceAllUsesWith(VPI->getOperand(0)); 4048ec40675SFlorian Hahn VPI->eraseFromParent(); 4058ec40675SFlorian Hahn } 4068ec40675SFlorian Hahn } 4078ec40675SFlorian Hahn } 4088ec40675SFlorian Hahn }); 4098ec40675SFlorian Hahn if (UF == 1) { 4108ec40675SFlorian Hahn return; 4118ec40675SFlorian Hahn } 4128ec40675SFlorian Hahn 4138ec40675SFlorian Hahn UnrollState Unroller(Plan, UF, Ctx); 4148ec40675SFlorian Hahn 4158ec40675SFlorian Hahn // Iterate over all blocks in the plan starting from Entry, and unroll 4168ec40675SFlorian Hahn // recipes inside them. This includes the vector preheader and middle blocks, 4178ec40675SFlorian Hahn // which may set up or post-process per-part values. 4188ec40675SFlorian Hahn ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT( 4198ec40675SFlorian Hahn Plan.getEntry()); 4208ec40675SFlorian Hahn for (VPBlockBase *VPB : RPOT) 4218ec40675SFlorian Hahn Unroller.unrollBlock(VPB); 4228ec40675SFlorian Hahn 4238ec40675SFlorian Hahn unsigned Part = 1; 4248ec40675SFlorian Hahn // Remap operands of cloned header phis to update backedge values. The header 4258ec40675SFlorian Hahn // phis cloned during unrolling are just after the header phi for part 0. 4268ec40675SFlorian Hahn // Reset Part to 1 when reaching the first (part 0) recipe of a block. 4278ec40675SFlorian Hahn for (VPRecipeBase &H : 4288ec40675SFlorian Hahn Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { 4298ec40675SFlorian Hahn // The second operand of Fixed Order Recurrence phi's, feeding the spliced 4308ec40675SFlorian Hahn // value across the backedge, needs to remap to the last part of the spliced 4318ec40675SFlorian Hahn // value. 4328ec40675SFlorian Hahn if (isa<VPFirstOrderRecurrencePHIRecipe>(&H)) { 4338ec40675SFlorian Hahn Unroller.remapOperand(&H, 1, UF - 1); 4348ec40675SFlorian Hahn continue; 4358ec40675SFlorian Hahn } 4368ec40675SFlorian Hahn if (Unroller.contains(H.getVPSingleValue()) || 4378ec40675SFlorian Hahn isa<VPWidenPointerInductionRecipe>(&H)) { 4388ec40675SFlorian Hahn Part = 1; 4398ec40675SFlorian Hahn continue; 4408ec40675SFlorian Hahn } 4418ec40675SFlorian Hahn Unroller.remapOperands(&H, Part); 4428ec40675SFlorian Hahn Part++; 4438ec40675SFlorian Hahn } 4448ec40675SFlorian Hahn 44553266f73SFlorian Hahn VPlanTransforms::removeDeadRecipes(Plan); 4468ec40675SFlorian Hahn } 447