1 //===- VPRecipeBuilder.h - Helper class to build recipes --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H 10 #define LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H 11 12 #include "LoopVectorizationPlanner.h" 13 #include "VPlan.h" 14 #include "llvm/ADT/DenseMap.h" 15 #include "llvm/ADT/PointerUnion.h" 16 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 17 #include "llvm/IR/IRBuilder.h" 18 19 namespace llvm { 20 21 class LoopVectorizationLegality; 22 class LoopVectorizationCostModel; 23 class TargetLibraryInfo; 24 class TargetTransformInfo; 25 struct HistogramInfo; 26 27 /// A chain of instructions that form a partial reduction. 28 /// Designed to match: reduction_bin_op (bin_op (extend (A), (extend (B))), 29 /// accumulator). 30 struct PartialReductionChain { 31 PartialReductionChain(Instruction *Reduction, Instruction *ExtendA, 32 Instruction *ExtendB, Instruction *BinOp) 33 : Reduction(Reduction), ExtendA(ExtendA), ExtendB(ExtendB), BinOp(BinOp) { 34 } 35 /// The top-level binary operation that forms the reduction to a scalar 36 /// after the loop body. 37 Instruction *Reduction; 38 /// The extension of each of the inner binary operation's operands. 39 Instruction *ExtendA; 40 Instruction *ExtendB; 41 42 /// The binary operation using the extends that is then reduced. 43 Instruction *BinOp; 44 }; 45 46 /// Helper class to create VPRecipies from IR instructions. 47 class VPRecipeBuilder { 48 /// The VPlan new recipes are added to. 49 VPlan &Plan; 50 51 /// The loop that we evaluate. 52 Loop *OrigLoop; 53 54 /// Target Library Info. 55 const TargetLibraryInfo *TLI; 56 57 // Target Transform Info. 58 const TargetTransformInfo *TTI; 59 60 /// The legality analysis. 61 LoopVectorizationLegality *Legal; 62 63 /// The profitablity analysis. 64 LoopVectorizationCostModel &CM; 65 66 PredicatedScalarEvolution &PSE; 67 68 VPBuilder &Builder; 69 70 /// When we if-convert we need to create edge masks. We have to cache values 71 /// so that we don't end up with exponential recursion/IR. Note that 72 /// if-conversion currently takes place during VPlan-construction, so these 73 /// caches are only used at that stage. 74 using EdgeMaskCacheTy = 75 DenseMap<std::pair<BasicBlock *, BasicBlock *>, VPValue *>; 76 using BlockMaskCacheTy = DenseMap<BasicBlock *, VPValue *>; 77 EdgeMaskCacheTy EdgeMaskCache; 78 BlockMaskCacheTy BlockMaskCache; 79 80 // VPlan construction support: Hold a mapping from ingredients to 81 // their recipe. 82 DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe; 83 84 /// Cross-iteration reduction & first-order recurrence phis for which we need 85 /// to add the incoming value from the backedge after all recipes have been 86 /// created. 87 SmallVector<VPHeaderPHIRecipe *, 4> PhisToFix; 88 89 /// A mapping of partial reduction exit instructions to their scaling factor. 90 DenseMap<const Instruction *, unsigned> ScaledReductionMap; 91 92 /// Check if \p I can be widened at the start of \p Range and possibly 93 /// decrease the range such that the returned value holds for the entire \p 94 /// Range. The function should not be called for memory instructions or calls. 95 bool shouldWiden(Instruction *I, VFRange &Range) const; 96 97 /// Check if the load or store instruction \p I should widened for \p 98 /// Range.Start and potentially masked. Such instructions are handled by a 99 /// recipe that takes an additional VPInstruction for the mask. 100 VPWidenMemoryRecipe *tryToWidenMemory(Instruction *I, 101 ArrayRef<VPValue *> Operands, 102 VFRange &Range); 103 104 /// Check if an induction recipe should be constructed for \p Phi. If so build 105 /// and return it. If not, return null. 106 VPHeaderPHIRecipe *tryToOptimizeInductionPHI(PHINode *Phi, 107 ArrayRef<VPValue *> Operands, 108 VFRange &Range); 109 110 /// Optimize the special case where the operand of \p I is a constant integer 111 /// induction variable. 112 VPWidenIntOrFpInductionRecipe * 113 tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef<VPValue *> Operands, 114 VFRange &Range); 115 116 /// Handle non-loop phi nodes. Return a new VPBlendRecipe otherwise. Currently 117 /// all such phi nodes are turned into a sequence of select instructions as 118 /// the vectorizer currently performs full if-conversion. 119 VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands); 120 121 /// Handle call instructions. If \p CI can be widened for \p Range.Start, 122 /// return a new VPWidenCallRecipe or VPWidenIntrinsicRecipe. Range.End may be 123 /// decreased to ensure same decision from \p Range.Start to \p Range.End. 124 VPSingleDefRecipe *tryToWidenCall(CallInst *CI, ArrayRef<VPValue *> Operands, 125 VFRange &Range); 126 127 /// Check if \p I has an opcode that can be widened and return a VPWidenRecipe 128 /// if it can. The function should only be called if the cost-model indicates 129 /// that widening should be performed. 130 VPWidenRecipe *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands, 131 VPBasicBlock *VPBB); 132 133 /// Makes Histogram count operations safe for vectorization, by emitting a 134 /// llvm.experimental.vector.histogram.add intrinsic in place of the 135 /// Load + Add|Sub + Store operations that perform the histogram in the 136 /// original scalar loop. 137 VPHistogramRecipe *tryToWidenHistogram(const HistogramInfo *HI, 138 ArrayRef<VPValue *> Operands); 139 140 /// Examines reduction operations to see if the target can use a cheaper 141 /// operation with a wider per-iteration input VF and narrower PHI VF. 142 /// Each element within Chains is a pair with a struct containing reduction 143 /// information and the scaling factor between the number of elements in 144 /// the input and output. 145 /// Recursively calls itself to identify chained scaled reductions. 146 /// Returns true if this invocation added an entry to Chains, otherwise false. 147 /// i.e. returns false in the case that a subcall adds an entry to Chains, 148 /// but the top-level call does not. 149 bool getScaledReductions( 150 Instruction *PHI, Instruction *RdxExitInstr, VFRange &Range, 151 SmallVectorImpl<std::pair<PartialReductionChain, unsigned>> &Chains); 152 153 public: 154 VPRecipeBuilder(VPlan &Plan, Loop *OrigLoop, const TargetLibraryInfo *TLI, 155 const TargetTransformInfo *TTI, 156 LoopVectorizationLegality *Legal, 157 LoopVectorizationCostModel &CM, 158 PredicatedScalarEvolution &PSE, VPBuilder &Builder) 159 : Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal), 160 CM(CM), PSE(PSE), Builder(Builder) {} 161 162 std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) { 163 auto It = ScaledReductionMap.find(ExitInst); 164 return It == ScaledReductionMap.end() ? std::nullopt 165 : std::make_optional(It->second); 166 } 167 168 /// Find all possible partial reductions in the loop and track all of those 169 /// that are valid so recipes can be formed later. 170 void collectScaledReductions(VFRange &Range); 171 172 /// Create and return a widened recipe for \p I if one can be created within 173 /// the given VF \p Range. 174 VPRecipeBase *tryToCreateWidenRecipe(Instruction *Instr, 175 ArrayRef<VPValue *> Operands, 176 VFRange &Range, VPBasicBlock *VPBB); 177 178 /// Create and return a partial reduction recipe for a reduction instruction 179 /// along with binary operation and reduction phi operands. 180 VPRecipeBase *tryToCreatePartialReduction(Instruction *Reduction, 181 ArrayRef<VPValue *> Operands); 182 183 /// Set the recipe created for given ingredient. 184 void setRecipe(Instruction *I, VPRecipeBase *R) { 185 assert(!Ingredient2Recipe.contains(I) && 186 "Cannot reset recipe for instruction."); 187 Ingredient2Recipe[I] = R; 188 } 189 190 /// Create the mask for the vector loop header block. 191 void createHeaderMask(); 192 193 /// A helper function that computes the predicate of the block BB, assuming 194 /// that the header block of the loop is set to True or the loop mask when 195 /// tail folding. 196 void createBlockInMask(BasicBlock *BB); 197 198 /// Returns the *entry* mask for the block \p BB. 199 VPValue *getBlockInMask(BasicBlock *BB) const; 200 201 /// Create an edge mask for every destination of cases and/or default. 202 void createSwitchEdgeMasks(SwitchInst *SI); 203 204 /// A helper function that computes the predicate of the edge between SRC 205 /// and DST. 206 VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst); 207 208 /// A helper that returns the previously computed predicate of the edge 209 /// between SRC and DST. 210 VPValue *getEdgeMask(BasicBlock *Src, BasicBlock *Dst) const; 211 212 /// Return the recipe created for given ingredient. 213 VPRecipeBase *getRecipe(Instruction *I) { 214 assert(Ingredient2Recipe.count(I) && 215 "Recording this ingredients recipe was not requested"); 216 assert(Ingredient2Recipe[I] != nullptr && 217 "Ingredient doesn't have a recipe"); 218 return Ingredient2Recipe[I]; 219 } 220 221 /// Build a VPReplicationRecipe for \p I. If it is predicated, add the mask as 222 /// last operand. Range.End may be decreased to ensure same recipe behavior 223 /// from \p Range.Start to \p Range.End. 224 VPReplicateRecipe *handleReplication(Instruction *I, VFRange &Range); 225 226 /// Add the incoming values from the backedge to reduction & first-order 227 /// recurrence cross-iteration phis. 228 void fixHeaderPhis(); 229 230 /// Returns a range mapping the values of the range \p Operands to their 231 /// corresponding VPValues. 232 iterator_range<mapped_iterator<Use *, std::function<VPValue *(Value *)>>> 233 mapToVPValues(User::op_range Operands); 234 235 VPValue *getVPValueOrAddLiveIn(Value *V) { 236 if (auto *I = dyn_cast<Instruction>(V)) { 237 if (auto *R = Ingredient2Recipe.lookup(I)) 238 return R->getVPSingleValue(); 239 } 240 return Plan.getOrAddLiveIn(V); 241 } 242 }; 243 } // end namespace llvm 244 245 #endif // LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H 246