xref: /openbsd-src/gnu/llvm/llvm/lib/Transforms/Vectorize/VPlan.h (revision d415bd752c734aee168c4ee86ff32e8cc249eb16)
109467b48Spatrick //===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick /// \file
1009467b48Spatrick /// This file contains the declarations of the Vectorization Plan base classes:
1109467b48Spatrick /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
1209467b48Spatrick ///    VPBlockBase, together implementing a Hierarchical CFG;
13*d415bd75Srobert /// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
1409467b48Spatrick ///    within VPBasicBlocks;
15*d415bd75Srobert /// 3. VPInstruction, a concrete Recipe and VPUser modeling a single planned
1609467b48Spatrick ///    instruction;
17*d415bd75Srobert /// 4. The VPlan class holding a candidate for vectorization;
18*d415bd75Srobert /// 5. The VPlanPrinter class providing a way to print a plan in dot format;
1909467b48Spatrick /// These are documented in docs/VectorizationPlan.rst.
2009467b48Spatrick //
2109467b48Spatrick //===----------------------------------------------------------------------===//
2209467b48Spatrick 
2309467b48Spatrick #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
2409467b48Spatrick #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
2509467b48Spatrick 
2609467b48Spatrick #include "VPlanValue.h"
2709467b48Spatrick #include "llvm/ADT/DenseMap.h"
2809467b48Spatrick #include "llvm/ADT/DepthFirstIterator.h"
29*d415bd75Srobert #include "llvm/ADT/MapVector.h"
3009467b48Spatrick #include "llvm/ADT/SmallBitVector.h"
3109467b48Spatrick #include "llvm/ADT/SmallPtrSet.h"
3209467b48Spatrick #include "llvm/ADT/SmallVector.h"
3309467b48Spatrick #include "llvm/ADT/Twine.h"
3409467b48Spatrick #include "llvm/ADT/ilist.h"
3509467b48Spatrick #include "llvm/ADT/ilist_node.h"
36*d415bd75Srobert #include "llvm/Analysis/LoopInfo.h"
3709467b48Spatrick #include "llvm/Analysis/VectorUtils.h"
38*d415bd75Srobert #include "llvm/IR/DebugLoc.h"
39*d415bd75Srobert #include "llvm/IR/FMF.h"
40*d415bd75Srobert #include "llvm/Transforms/Utils/LoopVersioning.h"
4109467b48Spatrick #include <algorithm>
4209467b48Spatrick #include <cassert>
4309467b48Spatrick #include <cstddef>
4409467b48Spatrick #include <string>
4509467b48Spatrick 
4609467b48Spatrick namespace llvm {
4709467b48Spatrick 
4809467b48Spatrick class BasicBlock;
4909467b48Spatrick class DominatorTree;
50*d415bd75Srobert class InductionDescriptor;
5109467b48Spatrick class InnerLoopVectorizer;
52*d415bd75Srobert class IRBuilderBase;
5309467b48Spatrick class LoopInfo;
54*d415bd75Srobert class PredicateScalarEvolution;
5509467b48Spatrick class raw_ostream;
5673471bf0Spatrick class RecurrenceDescriptor;
57*d415bd75Srobert class SCEV;
58*d415bd75Srobert class Type;
5909467b48Spatrick class VPBasicBlock;
6009467b48Spatrick class VPRegionBlock;
6109467b48Spatrick class VPlan;
62*d415bd75Srobert class VPReplicateRecipe;
6309467b48Spatrick class VPlanSlp;
64*d415bd75Srobert class Value;
65*d415bd75Srobert 
66*d415bd75Srobert namespace Intrinsic {
67*d415bd75Srobert typedef unsigned ID;
68*d415bd75Srobert }
6909467b48Spatrick 
7073471bf0Spatrick /// Returns a calculation for the total number of elements for a given \p VF.
7173471bf0Spatrick /// For fixed width vectors this value is a constant, whereas for scalable
7273471bf0Spatrick /// vectors it is an expression determined at runtime.
73*d415bd75Srobert Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
74*d415bd75Srobert 
75*d415bd75Srobert /// Return a value for Step multiplied by VF.
76*d415bd75Srobert Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
77*d415bd75Srobert                        int64_t Step);
78*d415bd75Srobert 
79*d415bd75Srobert const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE);
8073471bf0Spatrick 
8109467b48Spatrick /// A range of powers-of-2 vectorization factors with fixed start and
8209467b48Spatrick /// adjustable end. The range includes start and excludes end, e.g.,:
8309467b48Spatrick /// [1, 9) = {1, 2, 4, 8}
8409467b48Spatrick struct VFRange {
8509467b48Spatrick   // A power of 2.
8673471bf0Spatrick   const ElementCount Start;
8709467b48Spatrick 
8809467b48Spatrick   // Need not be a power of 2. If End <= Start range is empty.
8973471bf0Spatrick   ElementCount End;
9073471bf0Spatrick 
isEmptyVFRange9173471bf0Spatrick   bool isEmpty() const {
9273471bf0Spatrick     return End.getKnownMinValue() <= Start.getKnownMinValue();
9373471bf0Spatrick   }
9473471bf0Spatrick 
VFRangeVFRange9573471bf0Spatrick   VFRange(const ElementCount &Start, const ElementCount &End)
9673471bf0Spatrick       : Start(Start), End(End) {
9773471bf0Spatrick     assert(Start.isScalable() == End.isScalable() &&
9873471bf0Spatrick            "Both Start and End should have the same scalable flag");
9973471bf0Spatrick     assert(isPowerOf2_32(Start.getKnownMinValue()) &&
10073471bf0Spatrick            "Expected Start to be a power of 2");
10173471bf0Spatrick   }
10209467b48Spatrick };
10309467b48Spatrick 
10409467b48Spatrick using VPlanPtr = std::unique_ptr<VPlan>;
10509467b48Spatrick 
10609467b48Spatrick /// In what follows, the term "input IR" refers to code that is fed into the
10709467b48Spatrick /// vectorizer whereas the term "output IR" refers to code that is generated by
10809467b48Spatrick /// the vectorizer.
10909467b48Spatrick 
11073471bf0Spatrick /// VPLane provides a way to access lanes in both fixed width and scalable
11173471bf0Spatrick /// vectors, where for the latter the lane index sometimes needs calculating
11273471bf0Spatrick /// as a runtime expression.
11373471bf0Spatrick class VPLane {
11473471bf0Spatrick public:
11573471bf0Spatrick   /// Kind describes how to interpret Lane.
11673471bf0Spatrick   enum class Kind : uint8_t {
11773471bf0Spatrick     /// For First, Lane is the index into the first N elements of a
11873471bf0Spatrick     /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
11973471bf0Spatrick     First,
12073471bf0Spatrick     /// For ScalableLast, Lane is the offset from the start of the last
12173471bf0Spatrick     /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
12273471bf0Spatrick     /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
12373471bf0Spatrick     /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
12473471bf0Spatrick     ScalableLast
12573471bf0Spatrick   };
12673471bf0Spatrick 
12773471bf0Spatrick private:
12873471bf0Spatrick   /// in [0..VF)
12973471bf0Spatrick   unsigned Lane;
13073471bf0Spatrick 
13173471bf0Spatrick   /// Indicates how the Lane should be interpreted, as described above.
13273471bf0Spatrick   Kind LaneKind;
13373471bf0Spatrick 
13473471bf0Spatrick public:
VPLane(unsigned Lane,Kind LaneKind)13573471bf0Spatrick   VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
13673471bf0Spatrick 
getFirstLane()13773471bf0Spatrick   static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
13873471bf0Spatrick 
getLastLaneForVF(const ElementCount & VF)13973471bf0Spatrick   static VPLane getLastLaneForVF(const ElementCount &VF) {
14073471bf0Spatrick     unsigned LaneOffset = VF.getKnownMinValue() - 1;
14173471bf0Spatrick     Kind LaneKind;
14273471bf0Spatrick     if (VF.isScalable())
14373471bf0Spatrick       // In this case 'LaneOffset' refers to the offset from the start of the
14473471bf0Spatrick       // last subvector with VF.getKnownMinValue() elements.
14573471bf0Spatrick       LaneKind = VPLane::Kind::ScalableLast;
14673471bf0Spatrick     else
14773471bf0Spatrick       LaneKind = VPLane::Kind::First;
14873471bf0Spatrick     return VPLane(LaneOffset, LaneKind);
14973471bf0Spatrick   }
15073471bf0Spatrick 
15173471bf0Spatrick   /// Returns a compile-time known value for the lane index and asserts if the
15273471bf0Spatrick   /// lane can only be calculated at runtime.
getKnownLane()15373471bf0Spatrick   unsigned getKnownLane() const {
15473471bf0Spatrick     assert(LaneKind == Kind::First);
15573471bf0Spatrick     return Lane;
15673471bf0Spatrick   }
15773471bf0Spatrick 
15873471bf0Spatrick   /// Returns an expression describing the lane index that can be used at
15973471bf0Spatrick   /// runtime.
160*d415bd75Srobert   Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
16173471bf0Spatrick 
16273471bf0Spatrick   /// Returns the Kind of lane offset.
getKind()16373471bf0Spatrick   Kind getKind() const { return LaneKind; }
16473471bf0Spatrick 
16573471bf0Spatrick   /// Returns true if this is the first lane of the whole vector.
isFirstLane()16673471bf0Spatrick   bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
16773471bf0Spatrick 
16873471bf0Spatrick   /// Maps the lane to a cache index based on \p VF.
mapToCacheIndex(const ElementCount & VF)16973471bf0Spatrick   unsigned mapToCacheIndex(const ElementCount &VF) const {
17073471bf0Spatrick     switch (LaneKind) {
17173471bf0Spatrick     case VPLane::Kind::ScalableLast:
17273471bf0Spatrick       assert(VF.isScalable() && Lane < VF.getKnownMinValue());
17373471bf0Spatrick       return VF.getKnownMinValue() + Lane;
17473471bf0Spatrick     default:
17573471bf0Spatrick       assert(Lane < VF.getKnownMinValue());
17673471bf0Spatrick       return Lane;
17773471bf0Spatrick     }
17873471bf0Spatrick   }
17973471bf0Spatrick 
18073471bf0Spatrick   /// Returns the maxmimum number of lanes that we are able to consider
18173471bf0Spatrick   /// caching for \p VF.
getNumCachedLanes(const ElementCount & VF)18273471bf0Spatrick   static unsigned getNumCachedLanes(const ElementCount &VF) {
18373471bf0Spatrick     return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
18473471bf0Spatrick   }
18573471bf0Spatrick };
18673471bf0Spatrick 
18709467b48Spatrick /// VPIteration represents a single point in the iteration space of the output
18809467b48Spatrick /// (vectorized and/or unrolled) IR loop.
18909467b48Spatrick struct VPIteration {
19009467b48Spatrick   /// in [0..UF)
19109467b48Spatrick   unsigned Part;
19209467b48Spatrick 
19373471bf0Spatrick   VPLane Lane;
19409467b48Spatrick 
19573471bf0Spatrick   VPIteration(unsigned Part, unsigned Lane,
19673471bf0Spatrick               VPLane::Kind Kind = VPLane::Kind::First)
PartVPIteration19773471bf0Spatrick       : Part(Part), Lane(Lane, Kind) {}
19809467b48Spatrick 
VPIterationVPIteration19973471bf0Spatrick   VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {}
20009467b48Spatrick 
isFirstIterationVPIteration20173471bf0Spatrick   bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); }
20209467b48Spatrick };
20309467b48Spatrick 
20409467b48Spatrick /// VPTransformState holds information passed down when "executing" a VPlan,
20509467b48Spatrick /// needed for generating the output IR.
20609467b48Spatrick struct VPTransformState {
VPTransformStateVPTransformState20773471bf0Spatrick   VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
208*d415bd75Srobert                    DominatorTree *DT, IRBuilderBase &Builder,
20973471bf0Spatrick                    InnerLoopVectorizer *ILV, VPlan *Plan)
210*d415bd75Srobert       : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan),
211*d415bd75Srobert         LVer(nullptr) {}
21209467b48Spatrick 
21309467b48Spatrick   /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
21473471bf0Spatrick   ElementCount VF;
21509467b48Spatrick   unsigned UF;
21609467b48Spatrick 
21709467b48Spatrick   /// Hold the indices to generate specific scalar instructions. Null indicates
21809467b48Spatrick   /// that all instances are to be generated, using either scalar or vector
21909467b48Spatrick   /// instructions.
220*d415bd75Srobert   std::optional<VPIteration> Instance;
22109467b48Spatrick 
22209467b48Spatrick   struct DataState {
22309467b48Spatrick     /// A type for vectorized values in the new loop. Each value from the
22409467b48Spatrick     /// original loop, when vectorized, is represented by UF vector values in
22509467b48Spatrick     /// the new unrolled loop, where UF is the unroll factor.
22609467b48Spatrick     typedef SmallVector<Value *, 2> PerPartValuesTy;
22709467b48Spatrick 
22809467b48Spatrick     DenseMap<VPValue *, PerPartValuesTy> PerPartOutput;
22973471bf0Spatrick 
23073471bf0Spatrick     using ScalarsPerPartValuesTy = SmallVector<SmallVector<Value *, 4>, 2>;
23173471bf0Spatrick     DenseMap<VPValue *, ScalarsPerPartValuesTy> PerPartScalars;
23209467b48Spatrick   } Data;
23309467b48Spatrick 
23409467b48Spatrick   /// Get the generated Value for a given VPValue and a given Part. Note that
23509467b48Spatrick   /// as some Defs are still created by ILV and managed in its ValueMap, this
23609467b48Spatrick   /// method will delegate the call to ILV in such cases in order to provide
23709467b48Spatrick   /// callers a consistent API.
23809467b48Spatrick   /// \see set.
23973471bf0Spatrick   Value *get(VPValue *Def, unsigned Part);
24009467b48Spatrick 
241097a140dSpatrick   /// Get the generated Value for a given VPValue and given Part and Lane.
24273471bf0Spatrick   Value *get(VPValue *Def, const VPIteration &Instance);
24373471bf0Spatrick 
hasVectorValueVPTransformState24473471bf0Spatrick   bool hasVectorValue(VPValue *Def, unsigned Part) {
24573471bf0Spatrick     auto I = Data.PerPartOutput.find(Def);
24673471bf0Spatrick     return I != Data.PerPartOutput.end() && Part < I->second.size() &&
24773471bf0Spatrick            I->second[Part];
248097a140dSpatrick   }
249097a140dSpatrick 
hasAnyVectorValueVPTransformState25073471bf0Spatrick   bool hasAnyVectorValue(VPValue *Def) const {
25173471bf0Spatrick     return Data.PerPartOutput.find(Def) != Data.PerPartOutput.end();
25273471bf0Spatrick   }
25373471bf0Spatrick 
hasScalarValueVPTransformState25473471bf0Spatrick   bool hasScalarValue(VPValue *Def, VPIteration Instance) {
25573471bf0Spatrick     auto I = Data.PerPartScalars.find(Def);
25673471bf0Spatrick     if (I == Data.PerPartScalars.end())
25773471bf0Spatrick       return false;
25873471bf0Spatrick     unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
25973471bf0Spatrick     return Instance.Part < I->second.size() &&
26073471bf0Spatrick            CacheIdx < I->second[Instance.Part].size() &&
26173471bf0Spatrick            I->second[Instance.Part][CacheIdx];
26209467b48Spatrick   }
26309467b48Spatrick 
26409467b48Spatrick   /// Set the generated Value for a given VPValue and a given Part.
setVPTransformState26509467b48Spatrick   void set(VPValue *Def, Value *V, unsigned Part) {
26609467b48Spatrick     if (!Data.PerPartOutput.count(Def)) {
26709467b48Spatrick       DataState::PerPartValuesTy Entry(UF);
26809467b48Spatrick       Data.PerPartOutput[Def] = Entry;
26909467b48Spatrick     }
27009467b48Spatrick     Data.PerPartOutput[Def][Part] = V;
27109467b48Spatrick   }
27273471bf0Spatrick   /// Reset an existing vector value for \p Def and a given \p Part.
resetVPTransformState27373471bf0Spatrick   void reset(VPValue *Def, Value *V, unsigned Part) {
27473471bf0Spatrick     auto Iter = Data.PerPartOutput.find(Def);
27573471bf0Spatrick     assert(Iter != Data.PerPartOutput.end() &&
27673471bf0Spatrick            "need to overwrite existing value");
27773471bf0Spatrick     Iter->second[Part] = V;
27873471bf0Spatrick   }
27973471bf0Spatrick 
28073471bf0Spatrick   /// Set the generated scalar \p V for \p Def and the given \p Instance.
setVPTransformState28173471bf0Spatrick   void set(VPValue *Def, Value *V, const VPIteration &Instance) {
28273471bf0Spatrick     auto Iter = Data.PerPartScalars.insert({Def, {}});
28373471bf0Spatrick     auto &PerPartVec = Iter.first->second;
28473471bf0Spatrick     while (PerPartVec.size() <= Instance.Part)
28573471bf0Spatrick       PerPartVec.emplace_back();
28673471bf0Spatrick     auto &Scalars = PerPartVec[Instance.Part];
28773471bf0Spatrick     unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
28873471bf0Spatrick     while (Scalars.size() <= CacheIdx)
28973471bf0Spatrick       Scalars.push_back(nullptr);
29073471bf0Spatrick     assert(!Scalars[CacheIdx] && "should overwrite existing value");
29173471bf0Spatrick     Scalars[CacheIdx] = V;
29273471bf0Spatrick   }
29373471bf0Spatrick 
29473471bf0Spatrick   /// Reset an existing scalar value for \p Def and a given \p Instance.
resetVPTransformState29573471bf0Spatrick   void reset(VPValue *Def, Value *V, const VPIteration &Instance) {
29673471bf0Spatrick     auto Iter = Data.PerPartScalars.find(Def);
29773471bf0Spatrick     assert(Iter != Data.PerPartScalars.end() &&
29873471bf0Spatrick            "need to overwrite existing value");
29973471bf0Spatrick     assert(Instance.Part < Iter->second.size() &&
30073471bf0Spatrick            "need to overwrite existing value");
30173471bf0Spatrick     unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
30273471bf0Spatrick     assert(CacheIdx < Iter->second[Instance.Part].size() &&
30373471bf0Spatrick            "need to overwrite existing value");
30473471bf0Spatrick     Iter->second[Instance.Part][CacheIdx] = V;
30573471bf0Spatrick   }
30609467b48Spatrick 
307*d415bd75Srobert   /// Add additional metadata to \p To that was not present on \p Orig.
308*d415bd75Srobert   ///
309*d415bd75Srobert   /// Currently this is used to add the noalias annotations based on the
310*d415bd75Srobert   /// inserted memchecks.  Use this for instructions that are *cloned* into the
311*d415bd75Srobert   /// vector loop.
312*d415bd75Srobert   void addNewMetadata(Instruction *To, const Instruction *Orig);
313*d415bd75Srobert 
314*d415bd75Srobert   /// Add metadata from one instruction to another.
315*d415bd75Srobert   ///
316*d415bd75Srobert   /// This includes both the original MDs from \p From and additional ones (\see
317*d415bd75Srobert   /// addNewMetadata).  Use this for *newly created* instructions in the vector
318*d415bd75Srobert   /// loop.
319*d415bd75Srobert   void addMetadata(Instruction *To, Instruction *From);
320*d415bd75Srobert 
321*d415bd75Srobert   /// Similar to the previous function but it adds the metadata to a
322*d415bd75Srobert   /// vector of instructions.
323*d415bd75Srobert   void addMetadata(ArrayRef<Value *> To, Instruction *From);
324*d415bd75Srobert 
325*d415bd75Srobert   /// Set the debug location in the builder using the debug location in \p V.
326*d415bd75Srobert   void setDebugLocFromInst(const Value *V);
327*d415bd75Srobert 
32809467b48Spatrick   /// Hold state information used when constructing the CFG of the output IR,
32909467b48Spatrick   /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
33009467b48Spatrick   struct CFGState {
33109467b48Spatrick     /// The previous VPBasicBlock visited. Initially set to null.
33209467b48Spatrick     VPBasicBlock *PrevVPBB = nullptr;
33309467b48Spatrick 
33409467b48Spatrick     /// The previous IR BasicBlock created or used. Initially set to the new
33509467b48Spatrick     /// header BasicBlock.
33609467b48Spatrick     BasicBlock *PrevBB = nullptr;
33709467b48Spatrick 
338*d415bd75Srobert     /// The last IR BasicBlock in the output IR. Set to the exit block of the
339*d415bd75Srobert     /// vector loop.
340*d415bd75Srobert     BasicBlock *ExitBB = nullptr;
34173471bf0Spatrick 
34209467b48Spatrick     /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
34309467b48Spatrick     /// of replication, maps the BasicBlock of the last replica created.
34409467b48Spatrick     SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
34509467b48Spatrick 
34609467b48Spatrick     CFGState() = default;
347*d415bd75Srobert 
348*d415bd75Srobert     /// Returns the BasicBlock* mapped to the pre-header of the loop region
349*d415bd75Srobert     /// containing \p R.
350*d415bd75Srobert     BasicBlock *getPreheaderBBFor(VPRecipeBase *R);
35109467b48Spatrick   } CFG;
35209467b48Spatrick 
35309467b48Spatrick   /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
35409467b48Spatrick   LoopInfo *LI;
35509467b48Spatrick 
35609467b48Spatrick   /// Hold a pointer to Dominator Tree to register new basic blocks in the loop.
35709467b48Spatrick   DominatorTree *DT;
35809467b48Spatrick 
35909467b48Spatrick   /// Hold a reference to the IRBuilder used to generate output IR code.
360*d415bd75Srobert   IRBuilderBase &Builder;
36109467b48Spatrick 
36209467b48Spatrick   VPValue2ValueTy VPValue2Value;
36309467b48Spatrick 
364097a140dSpatrick   /// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF).
365097a140dSpatrick   Value *CanonicalIV = nullptr;
366097a140dSpatrick 
36709467b48Spatrick   /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
36809467b48Spatrick   InnerLoopVectorizer *ILV;
36909467b48Spatrick 
37073471bf0Spatrick   /// Pointer to the VPlan code is generated for.
37173471bf0Spatrick   VPlan *Plan;
37273471bf0Spatrick 
373*d415bd75Srobert   /// Holds recipes that may generate a poison value that is used after
374*d415bd75Srobert   /// vectorization, even when their operands are not poison.
375*d415bd75Srobert   SmallPtrSet<VPRecipeBase *, 16> MayGeneratePoisonRecipes;
37673471bf0Spatrick 
377*d415bd75Srobert   /// The loop object for the current parent region, or nullptr.
378*d415bd75Srobert   Loop *CurrentVectorLoop = nullptr;
37973471bf0Spatrick 
380*d415bd75Srobert   /// LoopVersioning.  It's only set up (non-null) if memchecks were
381*d415bd75Srobert   /// used.
382*d415bd75Srobert   ///
383*d415bd75Srobert   /// This is currently only used to add no-alias metadata based on the
384*d415bd75Srobert   /// memchecks.  The actually versioning is performed manually.
385*d415bd75Srobert   std::unique_ptr<LoopVersioning> LVer;
38609467b48Spatrick };
38709467b48Spatrick 
38809467b48Spatrick /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
38909467b48Spatrick /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
39009467b48Spatrick class VPBlockBase {
39109467b48Spatrick   friend class VPBlockUtils;
39209467b48Spatrick 
39309467b48Spatrick   const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
39409467b48Spatrick 
39509467b48Spatrick   /// An optional name for the block.
39609467b48Spatrick   std::string Name;
39709467b48Spatrick 
39809467b48Spatrick   /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
39909467b48Spatrick   /// it is a topmost VPBlockBase.
40009467b48Spatrick   VPRegionBlock *Parent = nullptr;
40109467b48Spatrick 
40209467b48Spatrick   /// List of predecessor blocks.
40309467b48Spatrick   SmallVector<VPBlockBase *, 1> Predecessors;
40409467b48Spatrick 
40509467b48Spatrick   /// List of successor blocks.
40609467b48Spatrick   SmallVector<VPBlockBase *, 1> Successors;
40709467b48Spatrick 
408097a140dSpatrick   /// VPlan containing the block. Can only be set on the entry block of the
409097a140dSpatrick   /// plan.
410097a140dSpatrick   VPlan *Plan = nullptr;
411097a140dSpatrick 
41209467b48Spatrick   /// Add \p Successor as the last successor to this block.
appendSuccessor(VPBlockBase * Successor)41309467b48Spatrick   void appendSuccessor(VPBlockBase *Successor) {
41409467b48Spatrick     assert(Successor && "Cannot add nullptr successor!");
41509467b48Spatrick     Successors.push_back(Successor);
41609467b48Spatrick   }
41709467b48Spatrick 
41809467b48Spatrick   /// Add \p Predecessor as the last predecessor to this block.
appendPredecessor(VPBlockBase * Predecessor)41909467b48Spatrick   void appendPredecessor(VPBlockBase *Predecessor) {
42009467b48Spatrick     assert(Predecessor && "Cannot add nullptr predecessor!");
42109467b48Spatrick     Predecessors.push_back(Predecessor);
42209467b48Spatrick   }
42309467b48Spatrick 
42409467b48Spatrick   /// Remove \p Predecessor from the predecessors of this block.
removePredecessor(VPBlockBase * Predecessor)42509467b48Spatrick   void removePredecessor(VPBlockBase *Predecessor) {
42673471bf0Spatrick     auto Pos = find(Predecessors, Predecessor);
42709467b48Spatrick     assert(Pos && "Predecessor does not exist");
42809467b48Spatrick     Predecessors.erase(Pos);
42909467b48Spatrick   }
43009467b48Spatrick 
43109467b48Spatrick   /// Remove \p Successor from the successors of this block.
removeSuccessor(VPBlockBase * Successor)43209467b48Spatrick   void removeSuccessor(VPBlockBase *Successor) {
43373471bf0Spatrick     auto Pos = find(Successors, Successor);
43409467b48Spatrick     assert(Pos && "Successor does not exist");
43509467b48Spatrick     Successors.erase(Pos);
43609467b48Spatrick   }
43709467b48Spatrick 
43809467b48Spatrick protected:
VPBlockBase(const unsigned char SC,const std::string & N)43909467b48Spatrick   VPBlockBase(const unsigned char SC, const std::string &N)
44009467b48Spatrick       : SubclassID(SC), Name(N) {}
44109467b48Spatrick 
44209467b48Spatrick public:
44309467b48Spatrick   /// An enumeration for keeping track of the concrete subclass of VPBlockBase
44409467b48Spatrick   /// that are actually instantiated. Values of this enumeration are kept in the
44509467b48Spatrick   /// SubclassID field of the VPBlockBase objects. They are used for concrete
44609467b48Spatrick   /// type identification.
44709467b48Spatrick   using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC };
44809467b48Spatrick 
44909467b48Spatrick   using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
45009467b48Spatrick 
45109467b48Spatrick   virtual ~VPBlockBase() = default;
45209467b48Spatrick 
getName()45309467b48Spatrick   const std::string &getName() const { return Name; }
45409467b48Spatrick 
setName(const Twine & newName)45509467b48Spatrick   void setName(const Twine &newName) { Name = newName.str(); }
45609467b48Spatrick 
45709467b48Spatrick   /// \return an ID for the concrete type of this object.
45809467b48Spatrick   /// This is used to implement the classof checks. This should not be used
45909467b48Spatrick   /// for any other purpose, as the values may change as LLVM evolves.
getVPBlockID()46009467b48Spatrick   unsigned getVPBlockID() const { return SubclassID; }
46109467b48Spatrick 
getParent()46209467b48Spatrick   VPRegionBlock *getParent() { return Parent; }
getParent()46309467b48Spatrick   const VPRegionBlock *getParent() const { return Parent; }
46409467b48Spatrick 
465097a140dSpatrick   /// \return A pointer to the plan containing the current block.
466097a140dSpatrick   VPlan *getPlan();
467097a140dSpatrick   const VPlan *getPlan() const;
468097a140dSpatrick 
469097a140dSpatrick   /// Sets the pointer of the plan containing the block. The block must be the
470097a140dSpatrick   /// entry block into the VPlan.
471097a140dSpatrick   void setPlan(VPlan *ParentPlan);
472097a140dSpatrick 
setParent(VPRegionBlock * P)47309467b48Spatrick   void setParent(VPRegionBlock *P) { Parent = P; }
47409467b48Spatrick 
47509467b48Spatrick   /// \return the VPBasicBlock that is the entry of this VPBlockBase,
47609467b48Spatrick   /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
47709467b48Spatrick   /// VPBlockBase is a VPBasicBlock, it is returned.
47809467b48Spatrick   const VPBasicBlock *getEntryBasicBlock() const;
47909467b48Spatrick   VPBasicBlock *getEntryBasicBlock();
48009467b48Spatrick 
481*d415bd75Srobert   /// \return the VPBasicBlock that is the exiting this VPBlockBase,
48209467b48Spatrick   /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
48309467b48Spatrick   /// VPBlockBase is a VPBasicBlock, it is returned.
484*d415bd75Srobert   const VPBasicBlock *getExitingBasicBlock() const;
485*d415bd75Srobert   VPBasicBlock *getExitingBasicBlock();
48609467b48Spatrick 
getSuccessors()48709467b48Spatrick   const VPBlocksTy &getSuccessors() const { return Successors; }
getSuccessors()48809467b48Spatrick   VPBlocksTy &getSuccessors() { return Successors; }
48909467b48Spatrick 
successors()490*d415bd75Srobert   iterator_range<VPBlockBase **> successors() { return Successors; }
491*d415bd75Srobert 
getPredecessors()49209467b48Spatrick   const VPBlocksTy &getPredecessors() const { return Predecessors; }
getPredecessors()49309467b48Spatrick   VPBlocksTy &getPredecessors() { return Predecessors; }
49409467b48Spatrick 
49509467b48Spatrick   /// \return the successor of this VPBlockBase if it has a single successor.
49609467b48Spatrick   /// Otherwise return a null pointer.
getSingleSuccessor()49709467b48Spatrick   VPBlockBase *getSingleSuccessor() const {
49809467b48Spatrick     return (Successors.size() == 1 ? *Successors.begin() : nullptr);
49909467b48Spatrick   }
50009467b48Spatrick 
50109467b48Spatrick   /// \return the predecessor of this VPBlockBase if it has a single
50209467b48Spatrick   /// predecessor. Otherwise return a null pointer.
getSinglePredecessor()50309467b48Spatrick   VPBlockBase *getSinglePredecessor() const {
50409467b48Spatrick     return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
50509467b48Spatrick   }
50609467b48Spatrick 
getNumSuccessors()50709467b48Spatrick   size_t getNumSuccessors() const { return Successors.size(); }
getNumPredecessors()50809467b48Spatrick   size_t getNumPredecessors() const { return Predecessors.size(); }
50909467b48Spatrick 
51009467b48Spatrick   /// An Enclosing Block of a block B is any block containing B, including B
51109467b48Spatrick   /// itself. \return the closest enclosing block starting from "this", which
51209467b48Spatrick   /// has successors. \return the root enclosing block if all enclosing blocks
51309467b48Spatrick   /// have no successors.
51409467b48Spatrick   VPBlockBase *getEnclosingBlockWithSuccessors();
51509467b48Spatrick 
51609467b48Spatrick   /// \return the closest enclosing block starting from "this", which has
51709467b48Spatrick   /// predecessors. \return the root enclosing block if all enclosing blocks
51809467b48Spatrick   /// have no predecessors.
51909467b48Spatrick   VPBlockBase *getEnclosingBlockWithPredecessors();
52009467b48Spatrick 
52109467b48Spatrick   /// \return the successors either attached directly to this VPBlockBase or, if
52209467b48Spatrick   /// this VPBlockBase is the exit block of a VPRegionBlock and has no
52309467b48Spatrick   /// successors of its own, search recursively for the first enclosing
52409467b48Spatrick   /// VPRegionBlock that has successors and return them. If no such
52509467b48Spatrick   /// VPRegionBlock exists, return the (empty) successors of the topmost
52609467b48Spatrick   /// VPBlockBase reached.
getHierarchicalSuccessors()52709467b48Spatrick   const VPBlocksTy &getHierarchicalSuccessors() {
52809467b48Spatrick     return getEnclosingBlockWithSuccessors()->getSuccessors();
52909467b48Spatrick   }
53009467b48Spatrick 
53109467b48Spatrick   /// \return the hierarchical successor of this VPBlockBase if it has a single
53209467b48Spatrick   /// hierarchical successor. Otherwise return a null pointer.
getSingleHierarchicalSuccessor()53309467b48Spatrick   VPBlockBase *getSingleHierarchicalSuccessor() {
53409467b48Spatrick     return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
53509467b48Spatrick   }
53609467b48Spatrick 
53709467b48Spatrick   /// \return the predecessors either attached directly to this VPBlockBase or,
53809467b48Spatrick   /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
53909467b48Spatrick   /// predecessors of its own, search recursively for the first enclosing
54009467b48Spatrick   /// VPRegionBlock that has predecessors and return them. If no such
54109467b48Spatrick   /// VPRegionBlock exists, return the (empty) predecessors of the topmost
54209467b48Spatrick   /// VPBlockBase reached.
getHierarchicalPredecessors()54309467b48Spatrick   const VPBlocksTy &getHierarchicalPredecessors() {
54409467b48Spatrick     return getEnclosingBlockWithPredecessors()->getPredecessors();
54509467b48Spatrick   }
54609467b48Spatrick 
54709467b48Spatrick   /// \return the hierarchical predecessor of this VPBlockBase if it has a
54809467b48Spatrick   /// single hierarchical predecessor. Otherwise return a null pointer.
getSingleHierarchicalPredecessor()54909467b48Spatrick   VPBlockBase *getSingleHierarchicalPredecessor() {
55009467b48Spatrick     return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
55109467b48Spatrick   }
55209467b48Spatrick 
55309467b48Spatrick   /// Set a given VPBlockBase \p Successor as the single successor of this
55409467b48Spatrick   /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
55509467b48Spatrick   /// This VPBlockBase must have no successors.
setOneSuccessor(VPBlockBase * Successor)55609467b48Spatrick   void setOneSuccessor(VPBlockBase *Successor) {
55709467b48Spatrick     assert(Successors.empty() && "Setting one successor when others exist.");
55809467b48Spatrick     appendSuccessor(Successor);
55909467b48Spatrick   }
56009467b48Spatrick 
56109467b48Spatrick   /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
562*d415bd75Srobert   /// successors of this VPBlockBase. This VPBlockBase is not added as
563*d415bd75Srobert   /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
564*d415bd75Srobert   /// successors.
setTwoSuccessors(VPBlockBase * IfTrue,VPBlockBase * IfFalse)565*d415bd75Srobert   void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
56609467b48Spatrick     assert(Successors.empty() && "Setting two successors when others exist.");
56709467b48Spatrick     appendSuccessor(IfTrue);
56809467b48Spatrick     appendSuccessor(IfFalse);
56909467b48Spatrick   }
57009467b48Spatrick 
57109467b48Spatrick   /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
57209467b48Spatrick   /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
57309467b48Spatrick   /// as successor of any VPBasicBlock in \p NewPreds.
setPredecessors(ArrayRef<VPBlockBase * > NewPreds)57409467b48Spatrick   void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {
57509467b48Spatrick     assert(Predecessors.empty() && "Block predecessors already set.");
57609467b48Spatrick     for (auto *Pred : NewPreds)
57709467b48Spatrick       appendPredecessor(Pred);
57809467b48Spatrick   }
57909467b48Spatrick 
58009467b48Spatrick   /// Remove all the predecessor of this block.
clearPredecessors()58109467b48Spatrick   void clearPredecessors() { Predecessors.clear(); }
58209467b48Spatrick 
583*d415bd75Srobert   /// Remove all the successors of this block.
clearSuccessors()584*d415bd75Srobert   void clearSuccessors() { Successors.clear(); }
58509467b48Spatrick 
58609467b48Spatrick   /// The method which generates the output IR that correspond to this
58709467b48Spatrick   /// VPBlockBase, thereby "executing" the VPlan.
588*d415bd75Srobert   virtual void execute(VPTransformState *State) = 0;
58909467b48Spatrick 
59009467b48Spatrick   /// Delete all blocks reachable from a given VPBlockBase, inclusive.
59109467b48Spatrick   static void deleteCFG(VPBlockBase *Entry);
59209467b48Spatrick 
59309467b48Spatrick   /// Return true if it is legal to hoist instructions into this block.
isLegalToHoistInto()59409467b48Spatrick   bool isLegalToHoistInto() {
59509467b48Spatrick     // There are currently no constraints that prevent an instruction to be
59609467b48Spatrick     // hoisted into a VPBlockBase.
59709467b48Spatrick     return true;
59809467b48Spatrick   }
59973471bf0Spatrick 
60073471bf0Spatrick   /// Replace all operands of VPUsers in the block with \p NewValue and also
60173471bf0Spatrick   /// replaces all uses of VPValues defined in the block with NewValue.
60273471bf0Spatrick   virtual void dropAllReferences(VPValue *NewValue) = 0;
60373471bf0Spatrick 
60473471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
printAsOperand(raw_ostream & OS,bool PrintType)60573471bf0Spatrick   void printAsOperand(raw_ostream &OS, bool PrintType) const {
60673471bf0Spatrick     OS << getName();
60773471bf0Spatrick   }
60873471bf0Spatrick 
60973471bf0Spatrick   /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
61073471bf0Spatrick   /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
61173471bf0Spatrick   /// consequtive numbers.
61273471bf0Spatrick   ///
61373471bf0Spatrick   /// Note that the numbering is applied to the whole VPlan, so printing
61473471bf0Spatrick   /// individual blocks is consistent with the whole VPlan printing.
61573471bf0Spatrick   virtual void print(raw_ostream &O, const Twine &Indent,
61673471bf0Spatrick                      VPSlotTracker &SlotTracker) const = 0;
61773471bf0Spatrick 
61873471bf0Spatrick   /// Print plain-text dump of this VPlan to \p O.
print(raw_ostream & O)61973471bf0Spatrick   void print(raw_ostream &O) const {
62073471bf0Spatrick     VPSlotTracker SlotTracker(getPlan());
62173471bf0Spatrick     print(O, "", SlotTracker);
62273471bf0Spatrick   }
62373471bf0Spatrick 
62473471bf0Spatrick   /// Print the successors of this block to \p O, prefixing all lines with \p
62573471bf0Spatrick   /// Indent.
62673471bf0Spatrick   void printSuccessors(raw_ostream &O, const Twine &Indent) const;
62773471bf0Spatrick 
62873471bf0Spatrick   /// Dump this VPBlockBase to dbgs().
dump()62973471bf0Spatrick   LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
63073471bf0Spatrick #endif
63109467b48Spatrick };
63209467b48Spatrick 
633*d415bd75Srobert /// A value that is used outside the VPlan. The operand of the user needs to be
634*d415bd75Srobert /// added to the associated LCSSA phi node.
635*d415bd75Srobert class VPLiveOut : public VPUser {
636*d415bd75Srobert   PHINode *Phi;
637*d415bd75Srobert 
638*d415bd75Srobert public:
VPLiveOut(PHINode * Phi,VPValue * Op)639*d415bd75Srobert   VPLiveOut(PHINode *Phi, VPValue *Op)
640*d415bd75Srobert       : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
641*d415bd75Srobert 
642*d415bd75Srobert   /// Fixup the wrapped LCSSA phi node in the unique exit block.  This simply
643*d415bd75Srobert   /// means we need to add the appropriate incoming value from the middle
644*d415bd75Srobert   /// block as exiting edges from the scalar epilogue loop (if present) are
645*d415bd75Srobert   /// already in place, and we exit the vector loop exclusively to the middle
646*d415bd75Srobert   /// block.
647*d415bd75Srobert   void fixPhi(VPlan &Plan, VPTransformState &State);
648*d415bd75Srobert 
649*d415bd75Srobert   /// Returns true if the VPLiveOut uses scalars of operand \p Op.
usesScalars(const VPValue * Op)650*d415bd75Srobert   bool usesScalars(const VPValue *Op) const override {
651*d415bd75Srobert     assert(is_contained(operands(), Op) &&
652*d415bd75Srobert            "Op must be an operand of the recipe");
653*d415bd75Srobert     return true;
654*d415bd75Srobert   }
655*d415bd75Srobert 
getPhi()656*d415bd75Srobert   PHINode *getPhi() const { return Phi; }
657*d415bd75Srobert };
658*d415bd75Srobert 
65909467b48Spatrick /// VPRecipeBase is a base class modeling a sequence of one or more output IR
66073471bf0Spatrick /// instructions. VPRecipeBase owns the the VPValues it defines through VPDef
66173471bf0Spatrick /// and is responsible for deleting its defined values. Single-value
66273471bf0Spatrick /// VPRecipeBases that also inherit from VPValue must make sure to inherit from
66373471bf0Spatrick /// VPRecipeBase before VPValue.
66473471bf0Spatrick class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
66573471bf0Spatrick                      public VPDef,
66673471bf0Spatrick                      public VPUser {
66709467b48Spatrick   friend VPBasicBlock;
66809467b48Spatrick   friend class VPBlockUtils;
66909467b48Spatrick 
67009467b48Spatrick   /// Each VPRecipe belongs to a single VPBasicBlock.
67109467b48Spatrick   VPBasicBlock *Parent = nullptr;
67209467b48Spatrick 
67309467b48Spatrick public:
VPRecipeBase(const unsigned char SC,ArrayRef<VPValue * > Operands)67473471bf0Spatrick   VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands)
67573471bf0Spatrick       : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe) {}
67609467b48Spatrick 
67773471bf0Spatrick   template <typename IterT>
VPRecipeBase(const unsigned char SC,iterator_range<IterT> Operands)67873471bf0Spatrick   VPRecipeBase(const unsigned char SC, iterator_range<IterT> Operands)
67973471bf0Spatrick       : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe) {}
68009467b48Spatrick   virtual ~VPRecipeBase() = default;
68109467b48Spatrick 
68209467b48Spatrick   /// \return the VPBasicBlock which this VPRecipe belongs to.
getParent()68309467b48Spatrick   VPBasicBlock *getParent() { return Parent; }
getParent()68409467b48Spatrick   const VPBasicBlock *getParent() const { return Parent; }
68509467b48Spatrick 
68609467b48Spatrick   /// The method which generates the output IR instructions that correspond to
68709467b48Spatrick   /// this VPRecipe, thereby "executing" the VPlan.
688*d415bd75Srobert   virtual void execute(VPTransformState &State) = 0;
68909467b48Spatrick 
69009467b48Spatrick   /// Insert an unlinked recipe into a basic block immediately before
69109467b48Spatrick   /// the specified recipe.
69209467b48Spatrick   void insertBefore(VPRecipeBase *InsertPos);
693*d415bd75Srobert   /// Insert an unlinked recipe into \p BB immediately before the insertion
694*d415bd75Srobert   /// point \p IP;
695*d415bd75Srobert   void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
69609467b48Spatrick 
69709467b48Spatrick   /// Insert an unlinked Recipe into a basic block immediately after
69809467b48Spatrick   /// the specified Recipe.
69909467b48Spatrick   void insertAfter(VPRecipeBase *InsertPos);
70009467b48Spatrick 
70109467b48Spatrick   /// Unlink this recipe from its current VPBasicBlock and insert it into
70209467b48Spatrick   /// the VPBasicBlock that MovePos lives in, right after MovePos.
70309467b48Spatrick   void moveAfter(VPRecipeBase *MovePos);
70409467b48Spatrick 
70573471bf0Spatrick   /// Unlink this recipe and insert into BB before I.
70673471bf0Spatrick   ///
70773471bf0Spatrick   /// \pre I is a valid iterator into BB.
70873471bf0Spatrick   void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
70973471bf0Spatrick 
71009467b48Spatrick   /// This method unlinks 'this' from the containing basic block, but does not
71109467b48Spatrick   /// delete it.
71209467b48Spatrick   void removeFromParent();
71309467b48Spatrick 
71409467b48Spatrick   /// This method unlinks 'this' from the containing basic block and deletes it.
71509467b48Spatrick   ///
71609467b48Spatrick   /// \returns an iterator pointing to the element after the erased one
71709467b48Spatrick   iplist<VPRecipeBase>::iterator eraseFromParent();
71873471bf0Spatrick 
71973471bf0Spatrick   /// Returns the underlying instruction, if the recipe is a VPValue or nullptr
72073471bf0Spatrick   /// otherwise.
getUnderlyingInstr()72173471bf0Spatrick   Instruction *getUnderlyingInstr() {
72273471bf0Spatrick     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue());
72373471bf0Spatrick   }
getUnderlyingInstr()72473471bf0Spatrick   const Instruction *getUnderlyingInstr() const {
72573471bf0Spatrick     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue());
72673471bf0Spatrick   }
72773471bf0Spatrick 
72873471bf0Spatrick   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPDef * D)72973471bf0Spatrick   static inline bool classof(const VPDef *D) {
73073471bf0Spatrick     // All VPDefs are also VPRecipeBases.
73173471bf0Spatrick     return true;
73273471bf0Spatrick   }
73373471bf0Spatrick 
classof(const VPUser * U)73473471bf0Spatrick   static inline bool classof(const VPUser *U) {
73573471bf0Spatrick     return U->getVPUserID() == VPUser::VPUserID::Recipe;
73673471bf0Spatrick   }
73773471bf0Spatrick 
73873471bf0Spatrick   /// Returns true if the recipe may have side-effects.
73973471bf0Spatrick   bool mayHaveSideEffects() const;
74073471bf0Spatrick 
74173471bf0Spatrick   /// Returns true for PHI-like recipes.
isPhi()74273471bf0Spatrick   bool isPhi() const {
74373471bf0Spatrick     return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
74473471bf0Spatrick   }
74573471bf0Spatrick 
74673471bf0Spatrick   /// Returns true if the recipe may read from memory.
74773471bf0Spatrick   bool mayReadFromMemory() const;
74873471bf0Spatrick 
74973471bf0Spatrick   /// Returns true if the recipe may write to memory.
75073471bf0Spatrick   bool mayWriteToMemory() const;
75173471bf0Spatrick 
75273471bf0Spatrick   /// Returns true if the recipe may read from or write to memory.
mayReadOrWriteMemory()75373471bf0Spatrick   bool mayReadOrWriteMemory() const {
75473471bf0Spatrick     return mayReadFromMemory() || mayWriteToMemory();
75573471bf0Spatrick   }
75609467b48Spatrick };
75709467b48Spatrick 
758*d415bd75Srobert // Helper macro to define common classof implementations for recipes.
759*d415bd75Srobert #define VP_CLASSOF_IMPL(VPDefID)                                               \
760*d415bd75Srobert   static inline bool classof(const VPDef *D) {                                 \
761*d415bd75Srobert     return D->getVPDefID() == VPDefID;                                         \
762*d415bd75Srobert   }                                                                            \
763*d415bd75Srobert   static inline bool classof(const VPValue *V) {                               \
764*d415bd75Srobert     auto *R = V->getDefiningRecipe();                                          \
765*d415bd75Srobert     return R && R->getVPDefID() == VPDefID;                                    \
766*d415bd75Srobert   }                                                                            \
767*d415bd75Srobert   static inline bool classof(const VPUser *U) {                                \
768*d415bd75Srobert     auto *R = dyn_cast<VPRecipeBase>(U);                                       \
769*d415bd75Srobert     return R && R->getVPDefID() == VPDefID;                                    \
770*d415bd75Srobert   }                                                                            \
771*d415bd75Srobert   static inline bool classof(const VPRecipeBase *R) {                          \
772*d415bd75Srobert     return R->getVPDefID() == VPDefID;                                         \
77373471bf0Spatrick   }
77473471bf0Spatrick 
77509467b48Spatrick /// This is a concrete Recipe that models a single VPlan-level instruction.
77609467b48Spatrick /// While as any Recipe it may generate a sequence of IR instructions when
77709467b48Spatrick /// executed, these instructions would always form a single-def expression as
77809467b48Spatrick /// the VPInstruction is also a single def-use vertex.
77973471bf0Spatrick class VPInstruction : public VPRecipeBase, public VPValue {
78009467b48Spatrick   friend class VPlanSlp;
78109467b48Spatrick 
78209467b48Spatrick public:
78309467b48Spatrick   /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
78409467b48Spatrick   enum {
78573471bf0Spatrick     FirstOrderRecurrenceSplice =
78673471bf0Spatrick         Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
78773471bf0Spatrick                                       // values of a first-order recurrence.
78873471bf0Spatrick     Not,
78909467b48Spatrick     ICmpULE,
79009467b48Spatrick     SLPLoad,
79109467b48Spatrick     SLPStore,
792097a140dSpatrick     ActiveLaneMask,
793*d415bd75Srobert     CanonicalIVIncrement,
794*d415bd75Srobert     CanonicalIVIncrementNUW,
795*d415bd75Srobert     // The next two are similar to the above, but instead increment the
796*d415bd75Srobert     // canonical IV separately for each unrolled part.
797*d415bd75Srobert     CanonicalIVIncrementForPart,
798*d415bd75Srobert     CanonicalIVIncrementForPartNUW,
799*d415bd75Srobert     BranchOnCount,
800*d415bd75Srobert     BranchOnCond
80109467b48Spatrick   };
80209467b48Spatrick 
80309467b48Spatrick private:
80409467b48Spatrick   typedef unsigned char OpcodeTy;
80509467b48Spatrick   OpcodeTy Opcode;
806*d415bd75Srobert   FastMathFlags FMF;
807*d415bd75Srobert   DebugLoc DL;
808*d415bd75Srobert 
809*d415bd75Srobert   /// An optional name that can be used for the generated IR instruction.
810*d415bd75Srobert   const std::string Name;
81109467b48Spatrick 
81209467b48Spatrick   /// Utility method serving execute(): generates a single instance of the
81309467b48Spatrick   /// modeled instruction.
81409467b48Spatrick   void generateInstruction(VPTransformState &State, unsigned Part);
81509467b48Spatrick 
81609467b48Spatrick protected:
setUnderlyingInstr(Instruction * I)81709467b48Spatrick   void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
81809467b48Spatrick 
81909467b48Spatrick public:
820*d415bd75Srobert   VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL,
821*d415bd75Srobert                 const Twine &Name = "")
VPRecipeBase(VPDef::VPInstructionSC,Operands)822*d415bd75Srobert       : VPRecipeBase(VPDef::VPInstructionSC, Operands), VPValue(this),
823*d415bd75Srobert         Opcode(Opcode), DL(DL), Name(Name.str()) {}
82473471bf0Spatrick 
825*d415bd75Srobert   VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
826*d415bd75Srobert                 DebugLoc DL = {}, const Twine &Name = "")
VPInstruction(Opcode,ArrayRef<VPValue * > (Operands),DL,Name)827*d415bd75Srobert       : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name) {}
82809467b48Spatrick 
VP_CLASSOF_IMPL(VPDef::VPInstructionSC)829*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
83009467b48Spatrick 
83109467b48Spatrick   VPInstruction *clone() const {
83209467b48Spatrick     SmallVector<VPValue *, 2> Operands(operands());
833*d415bd75Srobert     return new VPInstruction(Opcode, Operands, DL, Name);
83409467b48Spatrick   }
83509467b48Spatrick 
getOpcode()83609467b48Spatrick   unsigned getOpcode() const { return Opcode; }
83709467b48Spatrick 
83809467b48Spatrick   /// Generate the instruction.
83909467b48Spatrick   /// TODO: We currently execute only per-part unless a specific instance is
84009467b48Spatrick   /// provided.
84109467b48Spatrick   void execute(VPTransformState &State) override;
84209467b48Spatrick 
84373471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
84473471bf0Spatrick   /// Print the VPInstruction to \p O.
845097a140dSpatrick   void print(raw_ostream &O, const Twine &Indent,
846097a140dSpatrick              VPSlotTracker &SlotTracker) const override;
84709467b48Spatrick 
84873471bf0Spatrick   /// Print the VPInstruction to dbgs() (for debugging).
84973471bf0Spatrick   LLVM_DUMP_METHOD void dump() const;
85073471bf0Spatrick #endif
85109467b48Spatrick 
85209467b48Spatrick   /// Return true if this instruction may modify memory.
mayWriteToMemory()85309467b48Spatrick   bool mayWriteToMemory() const {
85409467b48Spatrick     // TODO: we can use attributes of the called function to rule out memory
85509467b48Spatrick     //       modifications.
85609467b48Spatrick     return Opcode == Instruction::Store || Opcode == Instruction::Call ||
85709467b48Spatrick            Opcode == Instruction::Invoke || Opcode == SLPStore;
85809467b48Spatrick   }
859097a140dSpatrick 
hasResult()860097a140dSpatrick   bool hasResult() const {
861097a140dSpatrick     // CallInst may or may not have a result, depending on the called function.
862097a140dSpatrick     // Conservatively return calls have results for now.
863097a140dSpatrick     switch (getOpcode()) {
864097a140dSpatrick     case Instruction::Ret:
865097a140dSpatrick     case Instruction::Br:
866097a140dSpatrick     case Instruction::Store:
867097a140dSpatrick     case Instruction::Switch:
868097a140dSpatrick     case Instruction::IndirectBr:
869097a140dSpatrick     case Instruction::Resume:
870097a140dSpatrick     case Instruction::CatchRet:
871097a140dSpatrick     case Instruction::Unreachable:
872097a140dSpatrick     case Instruction::Fence:
873097a140dSpatrick     case Instruction::AtomicRMW:
874*d415bd75Srobert     case VPInstruction::BranchOnCond:
875*d415bd75Srobert     case VPInstruction::BranchOnCount:
876097a140dSpatrick       return false;
877097a140dSpatrick     default:
878097a140dSpatrick       return true;
879097a140dSpatrick     }
880097a140dSpatrick   }
881*d415bd75Srobert 
882*d415bd75Srobert   /// Set the fast-math flags.
883*d415bd75Srobert   void setFastMathFlags(FastMathFlags FMFNew);
884*d415bd75Srobert 
885*d415bd75Srobert   /// Returns true if the recipe only uses the first lane of operand \p Op.
onlyFirstLaneUsed(const VPValue * Op)886*d415bd75Srobert   bool onlyFirstLaneUsed(const VPValue *Op) const override {
887*d415bd75Srobert     assert(is_contained(operands(), Op) &&
888*d415bd75Srobert            "Op must be an operand of the recipe");
889*d415bd75Srobert     if (getOperand(0) != Op)
890*d415bd75Srobert       return false;
891*d415bd75Srobert     switch (getOpcode()) {
892*d415bd75Srobert     default:
893*d415bd75Srobert       return false;
894*d415bd75Srobert     case VPInstruction::ActiveLaneMask:
895*d415bd75Srobert     case VPInstruction::CanonicalIVIncrement:
896*d415bd75Srobert     case VPInstruction::CanonicalIVIncrementNUW:
897*d415bd75Srobert     case VPInstruction::CanonicalIVIncrementForPart:
898*d415bd75Srobert     case VPInstruction::CanonicalIVIncrementForPartNUW:
899*d415bd75Srobert     case VPInstruction::BranchOnCount:
900*d415bd75Srobert       return true;
901*d415bd75Srobert     };
902*d415bd75Srobert     llvm_unreachable("switch should return");
903*d415bd75Srobert   }
90409467b48Spatrick };
90509467b48Spatrick 
906097a140dSpatrick /// VPWidenRecipe is a recipe for producing a copy of vector type its
907097a140dSpatrick /// ingredient. This recipe covers most of the traditional vectorization cases
908097a140dSpatrick /// where each ingredient transforms into a vectorized version of itself.
90973471bf0Spatrick class VPWidenRecipe : public VPRecipeBase, public VPValue {
91009467b48Spatrick public:
911097a140dSpatrick   template <typename IterT>
VPWidenRecipe(Instruction & I,iterator_range<IterT> Operands)912097a140dSpatrick   VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
913*d415bd75Srobert       : VPRecipeBase(VPDef::VPWidenSC, Operands), VPValue(this, &I) {}
91409467b48Spatrick 
91509467b48Spatrick   ~VPWidenRecipe() override = default;
91609467b48Spatrick 
917*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPWidenSC)
91809467b48Spatrick 
91909467b48Spatrick   /// Produce widened copies of all Ingredients.
92009467b48Spatrick   void execute(VPTransformState &State) override;
92109467b48Spatrick 
92273471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
923097a140dSpatrick   /// Print the recipe.
924097a140dSpatrick   void print(raw_ostream &O, const Twine &Indent,
925097a140dSpatrick              VPSlotTracker &SlotTracker) const override;
92673471bf0Spatrick #endif
927097a140dSpatrick };
928097a140dSpatrick 
929097a140dSpatrick /// A recipe for widening Call instructions.
93073471bf0Spatrick class VPWidenCallRecipe : public VPRecipeBase, public VPValue {
931*d415bd75Srobert   /// ID of the vector intrinsic to call when widening the call. If set the
932*d415bd75Srobert   /// Intrinsic::not_intrinsic, a library call will be used instead.
933*d415bd75Srobert   Intrinsic::ID VectorIntrinsicID;
934097a140dSpatrick 
935097a140dSpatrick public:
936097a140dSpatrick   template <typename IterT>
VPWidenCallRecipe(CallInst & I,iterator_range<IterT> CallArguments,Intrinsic::ID VectorIntrinsicID)937*d415bd75Srobert   VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments,
938*d415bd75Srobert                     Intrinsic::ID VectorIntrinsicID)
939*d415bd75Srobert       : VPRecipeBase(VPDef::VPWidenCallSC, CallArguments), VPValue(this, &I),
940*d415bd75Srobert         VectorIntrinsicID(VectorIntrinsicID) {}
941097a140dSpatrick 
942097a140dSpatrick   ~VPWidenCallRecipe() override = default;
943097a140dSpatrick 
944*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
94509467b48Spatrick 
946097a140dSpatrick   /// Produce a widened version of the call instruction.
947097a140dSpatrick   void execute(VPTransformState &State) override;
948097a140dSpatrick 
94973471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
95009467b48Spatrick   /// Print the recipe.
951097a140dSpatrick   void print(raw_ostream &O, const Twine &Indent,
952097a140dSpatrick              VPSlotTracker &SlotTracker) const override;
95373471bf0Spatrick #endif
954097a140dSpatrick };
955097a140dSpatrick 
956097a140dSpatrick /// A recipe for widening select instructions.
95773471bf0Spatrick class VPWidenSelectRecipe : public VPRecipeBase, public VPValue {
958097a140dSpatrick 
959097a140dSpatrick   /// Is the condition of the select loop invariant?
960097a140dSpatrick   bool InvariantCond;
961097a140dSpatrick 
962097a140dSpatrick public:
963097a140dSpatrick   template <typename IterT>
VPWidenSelectRecipe(SelectInst & I,iterator_range<IterT> Operands,bool InvariantCond)964097a140dSpatrick   VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands,
965097a140dSpatrick                       bool InvariantCond)
966*d415bd75Srobert       : VPRecipeBase(VPDef::VPWidenSelectSC, Operands), VPValue(this, &I),
967097a140dSpatrick         InvariantCond(InvariantCond) {}
968097a140dSpatrick 
969097a140dSpatrick   ~VPWidenSelectRecipe() override = default;
970097a140dSpatrick 
971*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
972097a140dSpatrick 
973097a140dSpatrick   /// Produce a widened version of the select instruction.
974097a140dSpatrick   void execute(VPTransformState &State) override;
975097a140dSpatrick 
97673471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
977097a140dSpatrick   /// Print the recipe.
978097a140dSpatrick   void print(raw_ostream &O, const Twine &Indent,
979097a140dSpatrick              VPSlotTracker &SlotTracker) const override;
98073471bf0Spatrick #endif
98109467b48Spatrick };
98209467b48Spatrick 
98309467b48Spatrick /// A recipe for handling GEP instructions.
98473471bf0Spatrick class VPWidenGEPRecipe : public VPRecipeBase, public VPValue {
98509467b48Spatrick   bool IsPtrLoopInvariant;
98609467b48Spatrick   SmallBitVector IsIndexLoopInvariant;
98709467b48Spatrick 
98809467b48Spatrick public:
989097a140dSpatrick   template <typename IterT>
VPWidenGEPRecipe(GetElementPtrInst * GEP,iterator_range<IterT> Operands)99073471bf0Spatrick   VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands)
991*d415bd75Srobert       : VPRecipeBase(VPDef::VPWidenGEPSC, Operands), VPValue(this, GEP),
99273471bf0Spatrick         IsIndexLoopInvariant(GEP->getNumIndices(), false) {}
99373471bf0Spatrick 
99473471bf0Spatrick   template <typename IterT>
VPWidenGEPRecipe(GetElementPtrInst * GEP,iterator_range<IterT> Operands,Loop * OrigLoop)995097a140dSpatrick   VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands,
996097a140dSpatrick                    Loop *OrigLoop)
997*d415bd75Srobert       : VPRecipeBase(VPDef::VPWidenGEPSC, Operands), VPValue(this, GEP),
99809467b48Spatrick         IsIndexLoopInvariant(GEP->getNumIndices(), false) {
99909467b48Spatrick     IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand());
100009467b48Spatrick     for (auto Index : enumerate(GEP->indices()))
100109467b48Spatrick       IsIndexLoopInvariant[Index.index()] =
100209467b48Spatrick           OrigLoop->isLoopInvariant(Index.value().get());
100309467b48Spatrick   }
100409467b48Spatrick   ~VPWidenGEPRecipe() override = default;
100509467b48Spatrick 
1006*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
100709467b48Spatrick 
100809467b48Spatrick   /// Generate the gep nodes.
100909467b48Spatrick   void execute(VPTransformState &State) override;
101009467b48Spatrick 
101173471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
101209467b48Spatrick   /// Print the recipe.
1013097a140dSpatrick   void print(raw_ostream &O, const Twine &Indent,
1014097a140dSpatrick              VPSlotTracker &SlotTracker) const override;
101573471bf0Spatrick #endif
101609467b48Spatrick };
101709467b48Spatrick 
101809467b48Spatrick /// A recipe for handling phi nodes of integer and floating-point inductions,
1019*d415bd75Srobert /// producing their vector values.
1020*d415bd75Srobert class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue {
102109467b48Spatrick   PHINode *IV;
1022*d415bd75Srobert   const InductionDescriptor &IndDesc;
1023*d415bd75Srobert   bool NeedsVectorIV;
102409467b48Spatrick 
102509467b48Spatrick public:
VPWidenIntOrFpInductionRecipe(PHINode * IV,VPValue * Start,VPValue * Step,const InductionDescriptor & IndDesc,bool NeedsVectorIV)1026*d415bd75Srobert   VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1027*d415bd75Srobert                                 const InductionDescriptor &IndDesc,
1028*d415bd75Srobert                                 bool NeedsVectorIV)
1029*d415bd75Srobert       : VPRecipeBase(VPDef::VPWidenIntOrFpInductionSC, {Start, Step}),
1030*d415bd75Srobert         VPValue(this, IV), IV(IV), IndDesc(IndDesc),
1031*d415bd75Srobert         NeedsVectorIV(NeedsVectorIV) {}
103273471bf0Spatrick 
VPWidenIntOrFpInductionRecipe(PHINode * IV,VPValue * Start,VPValue * Step,const InductionDescriptor & IndDesc,TruncInst * Trunc,bool NeedsVectorIV)1033*d415bd75Srobert   VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1034*d415bd75Srobert                                 const InductionDescriptor &IndDesc,
1035*d415bd75Srobert                                 TruncInst *Trunc, bool NeedsVectorIV)
1036*d415bd75Srobert       : VPRecipeBase(VPDef::VPWidenIntOrFpInductionSC, {Start, Step}),
1037*d415bd75Srobert         VPValue(this, Trunc), IV(IV), IndDesc(IndDesc),
1038*d415bd75Srobert         NeedsVectorIV(NeedsVectorIV) {}
1039*d415bd75Srobert 
104009467b48Spatrick   ~VPWidenIntOrFpInductionRecipe() override = default;
104109467b48Spatrick 
1042*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
104309467b48Spatrick 
104409467b48Spatrick   /// Generate the vectorized and scalarized versions of the phi node as
104509467b48Spatrick   /// needed by their users.
104609467b48Spatrick   void execute(VPTransformState &State) override;
104709467b48Spatrick 
104873471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
104909467b48Spatrick   /// Print the recipe.
1050097a140dSpatrick   void print(raw_ostream &O, const Twine &Indent,
1051097a140dSpatrick              VPSlotTracker &SlotTracker) const override;
105273471bf0Spatrick #endif
105373471bf0Spatrick 
105473471bf0Spatrick   /// Returns the start value of the induction.
getStartValue()105573471bf0Spatrick   VPValue *getStartValue() { return getOperand(0); }
getStartValue()1056*d415bd75Srobert   const VPValue *getStartValue() const { return getOperand(0); }
105773471bf0Spatrick 
1058*d415bd75Srobert   /// Returns the step value of the induction.
getStepValue()1059*d415bd75Srobert   VPValue *getStepValue() { return getOperand(1); }
getStepValue()1060*d415bd75Srobert   const VPValue *getStepValue() const { return getOperand(1); }
106173471bf0Spatrick 
106273471bf0Spatrick   /// Returns the first defined value as TruncInst, if it is one or nullptr
106373471bf0Spatrick   /// otherwise.
getTruncInst()106473471bf0Spatrick   TruncInst *getTruncInst() {
106573471bf0Spatrick     return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue());
106673471bf0Spatrick   }
getTruncInst()106773471bf0Spatrick   const TruncInst *getTruncInst() const {
106873471bf0Spatrick     return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue());
106973471bf0Spatrick   }
1070*d415bd75Srobert 
getPHINode()1071*d415bd75Srobert   PHINode *getPHINode() { return IV; }
1072*d415bd75Srobert 
1073*d415bd75Srobert   /// Returns the induction descriptor for the recipe.
getInductionDescriptor()1074*d415bd75Srobert   const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1075*d415bd75Srobert 
1076*d415bd75Srobert   /// Returns true if the induction is canonical, i.e. starting at 0 and
1077*d415bd75Srobert   /// incremented by UF * VF (= the original IV is incremented by 1).
1078*d415bd75Srobert   bool isCanonical() const;
1079*d415bd75Srobert 
1080*d415bd75Srobert   /// Returns the scalar type of the induction.
getScalarType()1081*d415bd75Srobert   const Type *getScalarType() const {
1082*d415bd75Srobert     const TruncInst *TruncI = getTruncInst();
1083*d415bd75Srobert     return TruncI ? TruncI->getType() : IV->getType();
1084*d415bd75Srobert   }
1085*d415bd75Srobert 
1086*d415bd75Srobert   /// Returns true if a vector phi needs to be created for the induction.
needsVectorIV()1087*d415bd75Srobert   bool needsVectorIV() const { return NeedsVectorIV; }
108809467b48Spatrick };
108909467b48Spatrick 
1090*d415bd75Srobert /// A pure virtual base class for all recipes modeling header phis, including
1091*d415bd75Srobert /// phis for first order recurrences, pointer inductions and reductions. The
1092*d415bd75Srobert /// start value is the first operand of the recipe and the incoming value from
1093*d415bd75Srobert /// the backedge is the second operand.
1094*d415bd75Srobert ///
1095*d415bd75Srobert /// Inductions are modeled using the following sub-classes:
1096*d415bd75Srobert ///  * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
1097*d415bd75Srobert ///    starting at a specified value (zero for the main vector loop, the resume
1098*d415bd75Srobert ///    value for the epilogue vector loop) and stepping by 1. The induction
1099*d415bd75Srobert ///    controls exiting of the vector loop by comparing against the vector trip
1100*d415bd75Srobert ///    count. Produces a single scalar PHI for the induction value per
1101*d415bd75Srobert ///    iteration.
1102*d415bd75Srobert ///  * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
1103*d415bd75Srobert ///    floating point inductions with arbitrary start and step values. Produces
1104*d415bd75Srobert ///    a vector PHI per-part.
1105*d415bd75Srobert ///  * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
1106*d415bd75Srobert ///    value of an IV with different start and step values. Produces a single
1107*d415bd75Srobert ///    scalar value per iteration
1108*d415bd75Srobert ///  * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
1109*d415bd75Srobert ///    canonical or derived induction.
1110*d415bd75Srobert ///  * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
1111*d415bd75Srobert ///    pointer induction. Produces either a vector PHI per-part or scalar values
1112*d415bd75Srobert ///    per-lane based on the canonical induction.
1113*d415bd75Srobert class VPHeaderPHIRecipe : public VPRecipeBase, public VPValue {
111473471bf0Spatrick protected:
1115*d415bd75Srobert   VPHeaderPHIRecipe(unsigned char VPDefID, PHINode *Phi,
111673471bf0Spatrick                     VPValue *Start = nullptr)
1117*d415bd75Srobert       : VPRecipeBase(VPDefID, {}), VPValue(this, Phi) {
111873471bf0Spatrick     if (Start)
111973471bf0Spatrick       addOperand(Start);
112073471bf0Spatrick   }
112109467b48Spatrick 
112209467b48Spatrick public:
1123*d415bd75Srobert   ~VPHeaderPHIRecipe() override = default;
112473471bf0Spatrick 
1125*d415bd75Srobert   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * B)1126*d415bd75Srobert   static inline bool classof(const VPRecipeBase *B) {
1127*d415bd75Srobert     return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1128*d415bd75Srobert            B->getVPDefID() <= VPDef::VPLastPHISC;
1129*d415bd75Srobert   }
classof(const VPValue * V)1130*d415bd75Srobert   static inline bool classof(const VPValue *V) {
1131*d415bd75Srobert     auto *B = V->getDefiningRecipe();
1132*d415bd75Srobert     return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
1133*d415bd75Srobert            B->getVPDefID() <= VPRecipeBase::VPLastPHISC;
1134*d415bd75Srobert   }
1135*d415bd75Srobert 
1136*d415bd75Srobert   /// Generate the phi nodes.
1137*d415bd75Srobert   void execute(VPTransformState &State) override = 0;
1138*d415bd75Srobert 
1139*d415bd75Srobert #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1140*d415bd75Srobert   /// Print the recipe.
1141*d415bd75Srobert   void print(raw_ostream &O, const Twine &Indent,
1142*d415bd75Srobert              VPSlotTracker &SlotTracker) const override = 0;
1143*d415bd75Srobert #endif
1144*d415bd75Srobert 
1145*d415bd75Srobert   /// Returns the start value of the phi, if one is set.
getStartValue()1146*d415bd75Srobert   VPValue *getStartValue() {
1147*d415bd75Srobert     return getNumOperands() == 0 ? nullptr : getOperand(0);
1148*d415bd75Srobert   }
getStartValue()1149*d415bd75Srobert   VPValue *getStartValue() const {
1150*d415bd75Srobert     return getNumOperands() == 0 ? nullptr : getOperand(0);
1151*d415bd75Srobert   }
1152*d415bd75Srobert 
1153*d415bd75Srobert   /// Update the start value of the recipe.
setStartValue(VPValue * V)1154*d415bd75Srobert   void setStartValue(VPValue *V) { setOperand(0, V); }
1155*d415bd75Srobert 
1156*d415bd75Srobert   /// Returns the incoming value from the loop backedge.
getBackedgeValue()1157*d415bd75Srobert   VPValue *getBackedgeValue() {
1158*d415bd75Srobert     return getOperand(1);
1159*d415bd75Srobert   }
1160*d415bd75Srobert 
1161*d415bd75Srobert   /// Returns the backedge value as a recipe. The backedge value is guaranteed
1162*d415bd75Srobert   /// to be a recipe.
getBackedgeRecipe()1163*d415bd75Srobert   VPRecipeBase &getBackedgeRecipe() {
1164*d415bd75Srobert     return *getBackedgeValue()->getDefiningRecipe();
1165*d415bd75Srobert   }
1166*d415bd75Srobert };
1167*d415bd75Srobert 
1168*d415bd75Srobert class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
1169*d415bd75Srobert   const InductionDescriptor &IndDesc;
1170*d415bd75Srobert 
1171*d415bd75Srobert   bool IsScalarAfterVectorization;
1172*d415bd75Srobert 
1173*d415bd75Srobert public:
1174*d415bd75Srobert   /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
1175*d415bd75Srobert   /// Start.
VPWidenPointerInductionRecipe(PHINode * Phi,VPValue * Start,VPValue * Step,const InductionDescriptor & IndDesc,bool IsScalarAfterVectorization)1176*d415bd75Srobert   VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
1177*d415bd75Srobert                                 const InductionDescriptor &IndDesc,
1178*d415bd75Srobert                                 bool IsScalarAfterVectorization)
1179*d415bd75Srobert       : VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi),
1180*d415bd75Srobert         IndDesc(IndDesc),
1181*d415bd75Srobert         IsScalarAfterVectorization(IsScalarAfterVectorization) {
1182*d415bd75Srobert     addOperand(Start);
1183*d415bd75Srobert     addOperand(Step);
1184*d415bd75Srobert   }
1185*d415bd75Srobert 
1186*d415bd75Srobert   ~VPWidenPointerInductionRecipe() override = default;
1187*d415bd75Srobert 
1188*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
1189*d415bd75Srobert 
1190*d415bd75Srobert   /// Generate vector values for the pointer induction.
1191*d415bd75Srobert   void execute(VPTransformState &State) override;
1192*d415bd75Srobert 
1193*d415bd75Srobert   /// Returns true if only scalar values will be generated.
1194*d415bd75Srobert   bool onlyScalarsGenerated(ElementCount VF);
1195*d415bd75Srobert 
1196*d415bd75Srobert   /// Returns the induction descriptor for the recipe.
getInductionDescriptor()1197*d415bd75Srobert   const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1198*d415bd75Srobert 
1199*d415bd75Srobert #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1200*d415bd75Srobert   /// Print the recipe.
1201*d415bd75Srobert   void print(raw_ostream &O, const Twine &Indent,
1202*d415bd75Srobert              VPSlotTracker &SlotTracker) const override;
1203*d415bd75Srobert #endif
1204*d415bd75Srobert };
1205*d415bd75Srobert 
1206*d415bd75Srobert /// A recipe for handling header phis that are widened in the vector loop.
1207*d415bd75Srobert /// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
1208*d415bd75Srobert /// managed in the recipe directly.
1209*d415bd75Srobert class VPWidenPHIRecipe : public VPHeaderPHIRecipe {
1210*d415bd75Srobert   /// List of incoming blocks. Only used in the VPlan native path.
1211*d415bd75Srobert   SmallVector<VPBasicBlock *, 2> IncomingBlocks;
1212*d415bd75Srobert 
1213*d415bd75Srobert public:
121473471bf0Spatrick   /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
1215*d415bd75Srobert   VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
VPHeaderPHIRecipe(VPDef::VPWidenPHISC,Phi)1216*d415bd75Srobert       : VPHeaderPHIRecipe(VPDef::VPWidenPHISC, Phi) {
1217*d415bd75Srobert     if (Start)
1218*d415bd75Srobert       addOperand(Start);
121973471bf0Spatrick   }
122073471bf0Spatrick 
122109467b48Spatrick   ~VPWidenPHIRecipe() override = default;
122209467b48Spatrick 
1223*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
122409467b48Spatrick 
122509467b48Spatrick   /// Generate the phi/select nodes.
122609467b48Spatrick   void execute(VPTransformState &State) override;
122709467b48Spatrick 
122873471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
122909467b48Spatrick   /// Print the recipe.
1230097a140dSpatrick   void print(raw_ostream &O, const Twine &Indent,
1231097a140dSpatrick              VPSlotTracker &SlotTracker) const override;
123273471bf0Spatrick #endif
123373471bf0Spatrick 
123473471bf0Spatrick   /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
addIncoming(VPValue * IncomingV,VPBasicBlock * IncomingBlock)123573471bf0Spatrick   void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
123673471bf0Spatrick     addOperand(IncomingV);
123773471bf0Spatrick     IncomingBlocks.push_back(IncomingBlock);
123873471bf0Spatrick   }
123973471bf0Spatrick 
124073471bf0Spatrick   /// Returns the \p I th incoming VPBasicBlock.
getIncomingBlock(unsigned I)124173471bf0Spatrick   VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
1242*d415bd75Srobert 
1243*d415bd75Srobert   /// Returns the \p I th incoming VPValue.
getIncomingValue(unsigned I)1244*d415bd75Srobert   VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
124573471bf0Spatrick };
124673471bf0Spatrick 
124773471bf0Spatrick /// A recipe for handling first-order recurrence phis. The start value is the
124873471bf0Spatrick /// first operand of the recipe and the incoming value from the backedge is the
124973471bf0Spatrick /// second operand.
1250*d415bd75Srobert struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
VPFirstOrderRecurrencePHIRecipeVPFirstOrderRecurrencePHIRecipe125173471bf0Spatrick   VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
1252*d415bd75Srobert       : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
125373471bf0Spatrick 
VP_CLASSOF_IMPLVPFirstOrderRecurrencePHIRecipe1254*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
1255*d415bd75Srobert 
1256*d415bd75Srobert   static inline bool classof(const VPHeaderPHIRecipe *R) {
1257*d415bd75Srobert     return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
125873471bf0Spatrick   }
125973471bf0Spatrick 
126073471bf0Spatrick   void execute(VPTransformState &State) override;
126173471bf0Spatrick 
126273471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
126373471bf0Spatrick   /// Print the recipe.
126473471bf0Spatrick   void print(raw_ostream &O, const Twine &Indent,
126573471bf0Spatrick              VPSlotTracker &SlotTracker) const override;
126673471bf0Spatrick #endif
126773471bf0Spatrick };
126873471bf0Spatrick 
126973471bf0Spatrick /// A recipe for handling reduction phis. The start value is the first operand
127073471bf0Spatrick /// of the recipe and the incoming value from the backedge is the second
127173471bf0Spatrick /// operand.
1272*d415bd75Srobert class VPReductionPHIRecipe : public VPHeaderPHIRecipe {
127373471bf0Spatrick   /// Descriptor for the reduction.
1274*d415bd75Srobert   const RecurrenceDescriptor &RdxDesc;
127573471bf0Spatrick 
127673471bf0Spatrick   /// The phi is part of an in-loop reduction.
127773471bf0Spatrick   bool IsInLoop;
127873471bf0Spatrick 
127973471bf0Spatrick   /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
128073471bf0Spatrick   bool IsOrdered;
128173471bf0Spatrick 
128273471bf0Spatrick public:
128373471bf0Spatrick   /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
128473471bf0Spatrick   /// RdxDesc.
1285*d415bd75Srobert   VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc,
128673471bf0Spatrick                        VPValue &Start, bool IsInLoop = false,
128773471bf0Spatrick                        bool IsOrdered = false)
1288*d415bd75Srobert       : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
128973471bf0Spatrick         RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
129073471bf0Spatrick     assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
129173471bf0Spatrick   }
129273471bf0Spatrick 
129373471bf0Spatrick   ~VPReductionPHIRecipe() override = default;
129473471bf0Spatrick 
VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)1295*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
1296*d415bd75Srobert 
1297*d415bd75Srobert   static inline bool classof(const VPHeaderPHIRecipe *R) {
1298*d415bd75Srobert     return R->getVPDefID() == VPDef::VPReductionPHISC;
129973471bf0Spatrick   }
130073471bf0Spatrick 
130173471bf0Spatrick   /// Generate the phi/select nodes.
130273471bf0Spatrick   void execute(VPTransformState &State) override;
130373471bf0Spatrick 
130473471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
130573471bf0Spatrick   /// Print the recipe.
130673471bf0Spatrick   void print(raw_ostream &O, const Twine &Indent,
130773471bf0Spatrick              VPSlotTracker &SlotTracker) const override;
130873471bf0Spatrick #endif
130973471bf0Spatrick 
getRecurrenceDescriptor()1310*d415bd75Srobert   const RecurrenceDescriptor &getRecurrenceDescriptor() const {
1311*d415bd75Srobert     return RdxDesc;
1312*d415bd75Srobert   }
131373471bf0Spatrick 
131473471bf0Spatrick   /// Returns true, if the phi is part of an ordered reduction.
isOrdered()131573471bf0Spatrick   bool isOrdered() const { return IsOrdered; }
131673471bf0Spatrick 
131773471bf0Spatrick   /// Returns true, if the phi is part of an in-loop reduction.
isInLoop()131873471bf0Spatrick   bool isInLoop() const { return IsInLoop; }
131909467b48Spatrick };
132009467b48Spatrick 
132109467b48Spatrick /// A recipe for vectorizing a phi-node as a sequence of mask-based select
132209467b48Spatrick /// instructions.
132373471bf0Spatrick class VPBlendRecipe : public VPRecipeBase, public VPValue {
132409467b48Spatrick   PHINode *Phi;
132509467b48Spatrick 
132673471bf0Spatrick public:
1327097a140dSpatrick   /// The blend operation is a User of the incoming values and of their
1328097a140dSpatrick   /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value
1329097a140dSpatrick   /// might be incoming with a full mask for which there is no VPValue.
VPBlendRecipe(PHINode * Phi,ArrayRef<VPValue * > Operands)1330097a140dSpatrick   VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
1331*d415bd75Srobert       : VPRecipeBase(VPDef::VPBlendSC, Operands), VPValue(this, Phi), Phi(Phi) {
1332097a140dSpatrick     assert(Operands.size() > 0 &&
1333097a140dSpatrick            ((Operands.size() == 1) || (Operands.size() % 2 == 0)) &&
1334097a140dSpatrick            "Expected either a single incoming value or a positive even number "
1335097a140dSpatrick            "of operands");
133609467b48Spatrick   }
133709467b48Spatrick 
VP_CLASSOF_IMPL(VPDef::VPBlendSC)1338*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPBlendSC)
133909467b48Spatrick 
1340097a140dSpatrick   /// Return the number of incoming values, taking into account that a single
1341097a140dSpatrick   /// incoming value has no mask.
134273471bf0Spatrick   unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
1343097a140dSpatrick 
1344097a140dSpatrick   /// Return incoming value number \p Idx.
getIncomingValue(unsigned Idx)134573471bf0Spatrick   VPValue *getIncomingValue(unsigned Idx) const { return getOperand(Idx * 2); }
1346097a140dSpatrick 
1347097a140dSpatrick   /// Return mask number \p Idx.
getMask(unsigned Idx)134873471bf0Spatrick   VPValue *getMask(unsigned Idx) const { return getOperand(Idx * 2 + 1); }
1349097a140dSpatrick 
135009467b48Spatrick   /// Generate the phi/select nodes.
135109467b48Spatrick   void execute(VPTransformState &State) override;
135209467b48Spatrick 
135373471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
135409467b48Spatrick   /// Print the recipe.
1355097a140dSpatrick   void print(raw_ostream &O, const Twine &Indent,
1356097a140dSpatrick              VPSlotTracker &SlotTracker) const override;
135773471bf0Spatrick #endif
1358*d415bd75Srobert 
1359*d415bd75Srobert   /// Returns true if the recipe only uses the first lane of operand \p Op.
onlyFirstLaneUsed(const VPValue * Op)1360*d415bd75Srobert   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1361*d415bd75Srobert     assert(is_contained(operands(), Op) &&
1362*d415bd75Srobert            "Op must be an operand of the recipe");
1363*d415bd75Srobert     // Recursing through Blend recipes only, must terminate at header phi's the
1364*d415bd75Srobert     // latest.
1365*d415bd75Srobert     return all_of(users(),
1366*d415bd75Srobert                   [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
1367*d415bd75Srobert   }
136809467b48Spatrick };
136909467b48Spatrick 
137009467b48Spatrick /// VPInterleaveRecipe is a recipe for transforming an interleave group of load
137173471bf0Spatrick /// or stores into one wide load/store and shuffles. The first operand of a
137273471bf0Spatrick /// VPInterleave recipe is the address, followed by the stored values, followed
137373471bf0Spatrick /// by an optional mask.
137409467b48Spatrick class VPInterleaveRecipe : public VPRecipeBase {
137509467b48Spatrick   const InterleaveGroup<Instruction> *IG;
137673471bf0Spatrick 
137773471bf0Spatrick   bool HasMask = false;
137809467b48Spatrick 
137909467b48Spatrick public:
VPInterleaveRecipe(const InterleaveGroup<Instruction> * IG,VPValue * Addr,ArrayRef<VPValue * > StoredValues,VPValue * Mask)138009467b48Spatrick   VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
138173471bf0Spatrick                      ArrayRef<VPValue *> StoredValues, VPValue *Mask)
1382*d415bd75Srobert       : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG) {
138373471bf0Spatrick     for (unsigned i = 0; i < IG->getFactor(); ++i)
138473471bf0Spatrick       if (Instruction *I = IG->getMember(i)) {
138573471bf0Spatrick         if (I->getType()->isVoidTy())
138673471bf0Spatrick           continue;
138773471bf0Spatrick         new VPValue(I, this);
138873471bf0Spatrick       }
138973471bf0Spatrick 
139073471bf0Spatrick     for (auto *SV : StoredValues)
139173471bf0Spatrick       addOperand(SV);
139273471bf0Spatrick     if (Mask) {
139373471bf0Spatrick       HasMask = true;
139473471bf0Spatrick       addOperand(Mask);
139573471bf0Spatrick     }
139609467b48Spatrick   }
139709467b48Spatrick   ~VPInterleaveRecipe() override = default;
139809467b48Spatrick 
VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)1399*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
140009467b48Spatrick 
140109467b48Spatrick   /// Return the address accessed by this recipe.
140209467b48Spatrick   VPValue *getAddr() const {
140373471bf0Spatrick     return getOperand(0); // Address is the 1st, mandatory operand.
140409467b48Spatrick   }
140509467b48Spatrick 
140609467b48Spatrick   /// Return the mask used by this recipe. Note that a full mask is represented
140709467b48Spatrick   /// by a nullptr.
getMask()140809467b48Spatrick   VPValue *getMask() const {
140909467b48Spatrick     // Mask is optional and therefore the last, currently 2nd operand.
141073471bf0Spatrick     return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
141173471bf0Spatrick   }
141273471bf0Spatrick 
141373471bf0Spatrick   /// Return the VPValues stored by this interleave group. If it is a load
141473471bf0Spatrick   /// interleave group, return an empty ArrayRef.
getStoredValues()141573471bf0Spatrick   ArrayRef<VPValue *> getStoredValues() const {
141673471bf0Spatrick     // The first operand is the address, followed by the stored values, followed
141773471bf0Spatrick     // by an optional mask.
141873471bf0Spatrick     return ArrayRef<VPValue *>(op_begin(), getNumOperands())
1419*d415bd75Srobert         .slice(1, getNumStoreOperands());
142009467b48Spatrick   }
142109467b48Spatrick 
142209467b48Spatrick   /// Generate the wide load or store, and shuffles.
142309467b48Spatrick   void execute(VPTransformState &State) override;
142409467b48Spatrick 
142573471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
142609467b48Spatrick   /// Print the recipe.
1427097a140dSpatrick   void print(raw_ostream &O, const Twine &Indent,
1428097a140dSpatrick              VPSlotTracker &SlotTracker) const override;
142973471bf0Spatrick #endif
143009467b48Spatrick 
getInterleaveGroup()143109467b48Spatrick   const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
1432*d415bd75Srobert 
1433*d415bd75Srobert   /// Returns the number of stored operands of this interleave group. Returns 0
1434*d415bd75Srobert   /// for load interleave groups.
getNumStoreOperands()1435*d415bd75Srobert   unsigned getNumStoreOperands() const {
1436*d415bd75Srobert     return getNumOperands() - (HasMask ? 2 : 1);
1437*d415bd75Srobert   }
1438*d415bd75Srobert 
1439*d415bd75Srobert   /// The recipe only uses the first lane of the address.
onlyFirstLaneUsed(const VPValue * Op)1440*d415bd75Srobert   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1441*d415bd75Srobert     assert(is_contained(operands(), Op) &&
1442*d415bd75Srobert            "Op must be an operand of the recipe");
1443*d415bd75Srobert     return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
1444*d415bd75Srobert   }
144509467b48Spatrick };
144609467b48Spatrick 
144773471bf0Spatrick /// A recipe to represent inloop reduction operations, performing a reduction on
144873471bf0Spatrick /// a vector operand into a scalar value, and adding the result to a chain.
144973471bf0Spatrick /// The Operands are {ChainOp, VecOp, [Condition]}.
145073471bf0Spatrick class VPReductionRecipe : public VPRecipeBase, public VPValue {
145173471bf0Spatrick   /// The recurrence decriptor for the reduction in question.
1452*d415bd75Srobert   const RecurrenceDescriptor *RdxDesc;
145373471bf0Spatrick   /// Pointer to the TTI, needed to create the target reduction
145473471bf0Spatrick   const TargetTransformInfo *TTI;
145573471bf0Spatrick 
145673471bf0Spatrick public:
VPReductionRecipe(const RecurrenceDescriptor * R,Instruction * I,VPValue * ChainOp,VPValue * VecOp,VPValue * CondOp,const TargetTransformInfo * TTI)1457*d415bd75Srobert   VPReductionRecipe(const RecurrenceDescriptor *R, Instruction *I,
1458*d415bd75Srobert                     VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
145973471bf0Spatrick                     const TargetTransformInfo *TTI)
1460*d415bd75Srobert       : VPRecipeBase(VPDef::VPReductionSC, {ChainOp, VecOp}), VPValue(this, I),
1461*d415bd75Srobert         RdxDesc(R), TTI(TTI) {
146273471bf0Spatrick     if (CondOp)
146373471bf0Spatrick       addOperand(CondOp);
146473471bf0Spatrick   }
146573471bf0Spatrick 
146673471bf0Spatrick   ~VPReductionRecipe() override = default;
146773471bf0Spatrick 
1468*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPReductionSC)
146973471bf0Spatrick 
147073471bf0Spatrick   /// Generate the reduction in the loop
147173471bf0Spatrick   void execute(VPTransformState &State) override;
147273471bf0Spatrick 
147373471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
147473471bf0Spatrick   /// Print the recipe.
147573471bf0Spatrick   void print(raw_ostream &O, const Twine &Indent,
147673471bf0Spatrick              VPSlotTracker &SlotTracker) const override;
147773471bf0Spatrick #endif
147873471bf0Spatrick 
147973471bf0Spatrick   /// The VPValue of the scalar Chain being accumulated.
getChainOp()148073471bf0Spatrick   VPValue *getChainOp() const { return getOperand(0); }
148173471bf0Spatrick   /// The VPValue of the vector value to be reduced.
getVecOp()148273471bf0Spatrick   VPValue *getVecOp() const { return getOperand(1); }
148373471bf0Spatrick   /// The VPValue of the condition for the block.
getCondOp()148473471bf0Spatrick   VPValue *getCondOp() const {
148573471bf0Spatrick     return getNumOperands() > 2 ? getOperand(2) : nullptr;
148673471bf0Spatrick   }
148773471bf0Spatrick };
148873471bf0Spatrick 
148909467b48Spatrick /// VPReplicateRecipe replicates a given instruction producing multiple scalar
149009467b48Spatrick /// copies of the original scalar type, one per lane, instead of producing a
149109467b48Spatrick /// single copy of widened type for all lanes. If the instruction is known to be
149209467b48Spatrick /// uniform only one copy, per lane zero, will be generated.
149373471bf0Spatrick class VPReplicateRecipe : public VPRecipeBase, public VPValue {
149409467b48Spatrick   /// Indicator if only a single replica per lane is needed.
149509467b48Spatrick   bool IsUniform;
149609467b48Spatrick 
149709467b48Spatrick   /// Indicator if the replicas are also predicated.
149809467b48Spatrick   bool IsPredicated;
149909467b48Spatrick 
150009467b48Spatrick   /// Indicator if the scalar values should also be packed into a vector.
150109467b48Spatrick   bool AlsoPack;
150209467b48Spatrick 
150309467b48Spatrick public:
1504097a140dSpatrick   template <typename IterT>
1505097a140dSpatrick   VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,
1506097a140dSpatrick                     bool IsUniform, bool IsPredicated = false)
VPRecipeBase(VPDef::VPReplicateSC,Operands)1507*d415bd75Srobert       : VPRecipeBase(VPDef::VPReplicateSC, Operands), VPValue(this, I),
1508097a140dSpatrick         IsUniform(IsUniform), IsPredicated(IsPredicated) {
150909467b48Spatrick     // Retain the previous behavior of predicateInstructions(), where an
151009467b48Spatrick     // insert-element of a predicated instruction got hoisted into the
151109467b48Spatrick     // predicated basic block iff it was its only user. This is achieved by
151209467b48Spatrick     // having predicated instructions also pack their values into a vector by
151309467b48Spatrick     // default unless they have a replicated user which uses their scalar value.
151409467b48Spatrick     AlsoPack = IsPredicated && !I->use_empty();
151509467b48Spatrick   }
151609467b48Spatrick 
151709467b48Spatrick   ~VPReplicateRecipe() override = default;
151809467b48Spatrick 
1519*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
152009467b48Spatrick 
152109467b48Spatrick   /// Generate replicas of the desired Ingredient. Replicas will be generated
152209467b48Spatrick   /// for all parts and lanes unless a specific part and lane are specified in
152309467b48Spatrick   /// the \p State.
152409467b48Spatrick   void execute(VPTransformState &State) override;
152509467b48Spatrick 
setAlsoPack(bool Pack)152609467b48Spatrick   void setAlsoPack(bool Pack) { AlsoPack = Pack; }
152709467b48Spatrick 
152873471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
152909467b48Spatrick   /// Print the recipe.
1530097a140dSpatrick   void print(raw_ostream &O, const Twine &Indent,
1531097a140dSpatrick              VPSlotTracker &SlotTracker) const override;
153273471bf0Spatrick #endif
153373471bf0Spatrick 
isUniform()153473471bf0Spatrick   bool isUniform() const { return IsUniform; }
153573471bf0Spatrick 
isPacked()153673471bf0Spatrick   bool isPacked() const { return AlsoPack; }
153773471bf0Spatrick 
isPredicated()153873471bf0Spatrick   bool isPredicated() const { return IsPredicated; }
1539*d415bd75Srobert 
1540*d415bd75Srobert   /// Returns true if the recipe only uses the first lane of operand \p Op.
onlyFirstLaneUsed(const VPValue * Op)1541*d415bd75Srobert   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1542*d415bd75Srobert     assert(is_contained(operands(), Op) &&
1543*d415bd75Srobert            "Op must be an operand of the recipe");
1544*d415bd75Srobert     return isUniform();
1545*d415bd75Srobert   }
1546*d415bd75Srobert 
1547*d415bd75Srobert   /// Returns true if the recipe uses scalars of operand \p Op.
usesScalars(const VPValue * Op)1548*d415bd75Srobert   bool usesScalars(const VPValue *Op) const override {
1549*d415bd75Srobert     assert(is_contained(operands(), Op) &&
1550*d415bd75Srobert            "Op must be an operand of the recipe");
1551*d415bd75Srobert     return true;
1552*d415bd75Srobert   }
155309467b48Spatrick };
155409467b48Spatrick 
155509467b48Spatrick /// A recipe for generating conditional branches on the bits of a mask.
155609467b48Spatrick class VPBranchOnMaskRecipe : public VPRecipeBase {
155709467b48Spatrick public:
VPBranchOnMaskRecipe(VPValue * BlockInMask)155873471bf0Spatrick   VPBranchOnMaskRecipe(VPValue *BlockInMask)
1559*d415bd75Srobert       : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {
156009467b48Spatrick     if (BlockInMask) // nullptr means all-one mask.
156173471bf0Spatrick       addOperand(BlockInMask);
156209467b48Spatrick   }
156309467b48Spatrick 
1564*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
156509467b48Spatrick 
156609467b48Spatrick   /// Generate the extraction of the appropriate bit from the block mask and the
156709467b48Spatrick   /// conditional branch.
156809467b48Spatrick   void execute(VPTransformState &State) override;
156909467b48Spatrick 
157073471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
157109467b48Spatrick   /// Print the recipe.
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker)1572097a140dSpatrick   void print(raw_ostream &O, const Twine &Indent,
1573097a140dSpatrick              VPSlotTracker &SlotTracker) const override {
157473471bf0Spatrick     O << Indent << "BRANCH-ON-MASK ";
1575097a140dSpatrick     if (VPValue *Mask = getMask())
157673471bf0Spatrick       Mask->printAsOperand(O, SlotTracker);
157709467b48Spatrick     else
157809467b48Spatrick       O << " All-One";
157909467b48Spatrick   }
158073471bf0Spatrick #endif
1581097a140dSpatrick 
1582097a140dSpatrick   /// Return the mask used by this recipe. Note that a full mask is represented
1583097a140dSpatrick   /// by a nullptr.
getMask()1584097a140dSpatrick   VPValue *getMask() const {
158573471bf0Spatrick     assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
1586097a140dSpatrick     // Mask is optional.
158773471bf0Spatrick     return getNumOperands() == 1 ? getOperand(0) : nullptr;
1588097a140dSpatrick   }
1589*d415bd75Srobert 
1590*d415bd75Srobert   /// Returns true if the recipe uses scalars of operand \p Op.
usesScalars(const VPValue * Op)1591*d415bd75Srobert   bool usesScalars(const VPValue *Op) const override {
1592*d415bd75Srobert     assert(is_contained(operands(), Op) &&
1593*d415bd75Srobert            "Op must be an operand of the recipe");
1594*d415bd75Srobert     return true;
1595*d415bd75Srobert   }
159609467b48Spatrick };
159709467b48Spatrick 
159809467b48Spatrick /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
159909467b48Spatrick /// control converges back from a Branch-on-Mask. The phi nodes are needed in
160009467b48Spatrick /// order to merge values that are set under such a branch and feed their uses.
160109467b48Spatrick /// The phi nodes can be scalar or vector depending on the users of the value.
160209467b48Spatrick /// This recipe works in concert with VPBranchOnMaskRecipe.
160373471bf0Spatrick class VPPredInstPHIRecipe : public VPRecipeBase, public VPValue {
160409467b48Spatrick public:
160509467b48Spatrick   /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
160609467b48Spatrick   /// nodes after merging back from a Branch-on-Mask.
VPPredInstPHIRecipe(VPValue * PredV)160773471bf0Spatrick   VPPredInstPHIRecipe(VPValue *PredV)
1608*d415bd75Srobert       : VPRecipeBase(VPDef::VPPredInstPHISC, PredV), VPValue(this) {}
160909467b48Spatrick   ~VPPredInstPHIRecipe() override = default;
161009467b48Spatrick 
1611*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
161209467b48Spatrick 
161309467b48Spatrick   /// Generates phi nodes for live-outs as needed to retain SSA form.
161409467b48Spatrick   void execute(VPTransformState &State) override;
161509467b48Spatrick 
161673471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
161709467b48Spatrick   /// Print the recipe.
1618097a140dSpatrick   void print(raw_ostream &O, const Twine &Indent,
1619097a140dSpatrick              VPSlotTracker &SlotTracker) const override;
162073471bf0Spatrick #endif
1621*d415bd75Srobert 
1622*d415bd75Srobert   /// Returns true if the recipe uses scalars of operand \p Op.
usesScalars(const VPValue * Op)1623*d415bd75Srobert   bool usesScalars(const VPValue *Op) const override {
1624*d415bd75Srobert     assert(is_contained(operands(), Op) &&
1625*d415bd75Srobert            "Op must be an operand of the recipe");
1626*d415bd75Srobert     return true;
1627*d415bd75Srobert   }
162809467b48Spatrick };
162909467b48Spatrick 
163009467b48Spatrick /// A Recipe for widening load/store operations.
1631097a140dSpatrick /// The recipe uses the following VPValues:
1632097a140dSpatrick /// - For load: Address, optional mask
1633097a140dSpatrick /// - For store: Address, stored value, optional mask
163409467b48Spatrick /// TODO: We currently execute only per-part unless a specific instance is
163509467b48Spatrick /// provided.
163609467b48Spatrick class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
163773471bf0Spatrick   Instruction &Ingredient;
163809467b48Spatrick 
1639*d415bd75Srobert   // Whether the loaded-from / stored-to addresses are consecutive.
1640*d415bd75Srobert   bool Consecutive;
1641*d415bd75Srobert 
1642*d415bd75Srobert   // Whether the consecutive loaded/stored addresses are in reverse order.
1643*d415bd75Srobert   bool Reverse;
1644*d415bd75Srobert 
setMask(VPValue * Mask)1645097a140dSpatrick   void setMask(VPValue *Mask) {
1646097a140dSpatrick     if (!Mask)
1647097a140dSpatrick       return;
164873471bf0Spatrick     addOperand(Mask);
164909467b48Spatrick   }
165009467b48Spatrick 
isMasked()1651097a140dSpatrick   bool isMasked() const {
165273471bf0Spatrick     return isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
1653097a140dSpatrick   }
1654097a140dSpatrick 
1655097a140dSpatrick public:
VPWidenMemoryInstructionRecipe(LoadInst & Load,VPValue * Addr,VPValue * Mask,bool Consecutive,bool Reverse)1656*d415bd75Srobert   VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
1657*d415bd75Srobert                                  bool Consecutive, bool Reverse)
1658*d415bd75Srobert       : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}),
1659*d415bd75Srobert         Ingredient(Load), Consecutive(Consecutive), Reverse(Reverse) {
1660*d415bd75Srobert     assert((Consecutive || !Reverse) && "Reverse implies consecutive");
1661*d415bd75Srobert     new VPValue(this, &Load);
1662097a140dSpatrick     setMask(Mask);
1663097a140dSpatrick   }
1664097a140dSpatrick 
VPWidenMemoryInstructionRecipe(StoreInst & Store,VPValue * Addr,VPValue * StoredValue,VPValue * Mask,bool Consecutive,bool Reverse)1665097a140dSpatrick   VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
1666*d415bd75Srobert                                  VPValue *StoredValue, VPValue *Mask,
1667*d415bd75Srobert                                  bool Consecutive, bool Reverse)
1668*d415bd75Srobert       : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr, StoredValue}),
1669*d415bd75Srobert         Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) {
1670*d415bd75Srobert     assert((Consecutive || !Reverse) && "Reverse implies consecutive");
1671097a140dSpatrick     setMask(Mask);
1672097a140dSpatrick   }
1673097a140dSpatrick 
VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC)1674*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC)
167509467b48Spatrick 
167609467b48Spatrick   /// Return the address accessed by this recipe.
167709467b48Spatrick   VPValue *getAddr() const {
167873471bf0Spatrick     return getOperand(0); // Address is the 1st, mandatory operand.
167909467b48Spatrick   }
168009467b48Spatrick 
168109467b48Spatrick   /// Return the mask used by this recipe. Note that a full mask is represented
168209467b48Spatrick   /// by a nullptr.
getMask()168309467b48Spatrick   VPValue *getMask() const {
1684097a140dSpatrick     // Mask is optional and therefore the last operand.
168573471bf0Spatrick     return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1686097a140dSpatrick   }
1687097a140dSpatrick 
168873471bf0Spatrick   /// Returns true if this recipe is a store.
isStore()168973471bf0Spatrick   bool isStore() const { return isa<StoreInst>(Ingredient); }
169073471bf0Spatrick 
1691097a140dSpatrick   /// Return the address accessed by this recipe.
getStoredValue()1692097a140dSpatrick   VPValue *getStoredValue() const {
169373471bf0Spatrick     assert(isStore() && "Stored value only available for store instructions");
169473471bf0Spatrick     return getOperand(1); // Stored value is the 2nd, mandatory operand.
169509467b48Spatrick   }
169609467b48Spatrick 
1697*d415bd75Srobert   // Return whether the loaded-from / stored-to addresses are consecutive.
isConsecutive()1698*d415bd75Srobert   bool isConsecutive() const { return Consecutive; }
1699*d415bd75Srobert 
1700*d415bd75Srobert   // Return whether the consecutive loaded/stored addresses are in reverse
1701*d415bd75Srobert   // order.
isReverse()1702*d415bd75Srobert   bool isReverse() const { return Reverse; }
1703*d415bd75Srobert 
170409467b48Spatrick   /// Generate the wide load/store.
170509467b48Spatrick   void execute(VPTransformState &State) override;
170609467b48Spatrick 
170773471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
170809467b48Spatrick   /// Print the recipe.
1709097a140dSpatrick   void print(raw_ostream &O, const Twine &Indent,
1710097a140dSpatrick              VPSlotTracker &SlotTracker) const override;
171173471bf0Spatrick #endif
1712*d415bd75Srobert 
1713*d415bd75Srobert   /// Returns true if the recipe only uses the first lane of operand \p Op.
onlyFirstLaneUsed(const VPValue * Op)1714*d415bd75Srobert   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1715*d415bd75Srobert     assert(is_contained(operands(), Op) &&
1716*d415bd75Srobert            "Op must be an operand of the recipe");
1717*d415bd75Srobert 
1718*d415bd75Srobert     // Widened, consecutive memory operations only demand the first lane of
1719*d415bd75Srobert     // their address, unless the same operand is also stored. That latter can
1720*d415bd75Srobert     // happen with opaque pointers.
1721*d415bd75Srobert     return Op == getAddr() && isConsecutive() &&
1722*d415bd75Srobert            (!isStore() || Op != getStoredValue());
1723*d415bd75Srobert   }
1724*d415bd75Srobert 
getIngredient()1725*d415bd75Srobert   Instruction &getIngredient() const { return Ingredient; }
1726*d415bd75Srobert };
1727*d415bd75Srobert 
1728*d415bd75Srobert /// Recipe to expand a SCEV expression.
1729*d415bd75Srobert class VPExpandSCEVRecipe : public VPRecipeBase, public VPValue {
1730*d415bd75Srobert   const SCEV *Expr;
1731*d415bd75Srobert   ScalarEvolution &SE;
1732*d415bd75Srobert 
1733*d415bd75Srobert public:
VPExpandSCEVRecipe(const SCEV * Expr,ScalarEvolution & SE)1734*d415bd75Srobert   VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
1735*d415bd75Srobert       : VPRecipeBase(VPDef::VPExpandSCEVSC, {}), VPValue(this), Expr(Expr),
1736*d415bd75Srobert         SE(SE) {}
1737*d415bd75Srobert 
1738*d415bd75Srobert   ~VPExpandSCEVRecipe() override = default;
1739*d415bd75Srobert 
1740*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
1741*d415bd75Srobert 
1742*d415bd75Srobert   /// Generate a canonical vector induction variable of the vector loop, with
1743*d415bd75Srobert   void execute(VPTransformState &State) override;
1744*d415bd75Srobert 
1745*d415bd75Srobert #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1746*d415bd75Srobert   /// Print the recipe.
1747*d415bd75Srobert   void print(raw_ostream &O, const Twine &Indent,
1748*d415bd75Srobert              VPSlotTracker &SlotTracker) const override;
1749*d415bd75Srobert #endif
1750*d415bd75Srobert 
getSCEV()1751*d415bd75Srobert   const SCEV *getSCEV() const { return Expr; }
1752*d415bd75Srobert };
1753*d415bd75Srobert 
1754*d415bd75Srobert /// Canonical scalar induction phi of the vector loop. Starting at the specified
1755*d415bd75Srobert /// start value (either 0 or the resume value when vectorizing the epilogue
1756*d415bd75Srobert /// loop). VPWidenCanonicalIVRecipe represents the vector version of the
1757*d415bd75Srobert /// canonical induction variable.
1758*d415bd75Srobert class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
1759*d415bd75Srobert   DebugLoc DL;
1760*d415bd75Srobert 
1761*d415bd75Srobert public:
VPCanonicalIVPHIRecipe(VPValue * StartV,DebugLoc DL)1762*d415bd75Srobert   VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
1763*d415bd75Srobert       : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV), DL(DL) {}
1764*d415bd75Srobert 
1765*d415bd75Srobert   ~VPCanonicalIVPHIRecipe() override = default;
1766*d415bd75Srobert 
VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)1767*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
1768*d415bd75Srobert 
1769*d415bd75Srobert   static inline bool classof(const VPHeaderPHIRecipe *D) {
1770*d415bd75Srobert     return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
1771*d415bd75Srobert   }
1772*d415bd75Srobert 
1773*d415bd75Srobert   /// Generate the canonical scalar induction phi of the vector loop.
1774*d415bd75Srobert   void execute(VPTransformState &State) override;
1775*d415bd75Srobert 
1776*d415bd75Srobert #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1777*d415bd75Srobert   /// Print the recipe.
1778*d415bd75Srobert   void print(raw_ostream &O, const Twine &Indent,
1779*d415bd75Srobert              VPSlotTracker &SlotTracker) const override;
1780*d415bd75Srobert #endif
1781*d415bd75Srobert 
1782*d415bd75Srobert   /// Returns the scalar type of the induction.
getScalarType()1783*d415bd75Srobert   const Type *getScalarType() const {
1784*d415bd75Srobert     return getOperand(0)->getLiveInIRValue()->getType();
1785*d415bd75Srobert   }
1786*d415bd75Srobert 
1787*d415bd75Srobert   /// Returns true if the recipe only uses the first lane of operand \p Op.
onlyFirstLaneUsed(const VPValue * Op)1788*d415bd75Srobert   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1789*d415bd75Srobert     assert(is_contained(operands(), Op) &&
1790*d415bd75Srobert            "Op must be an operand of the recipe");
1791*d415bd75Srobert     return true;
1792*d415bd75Srobert   }
1793*d415bd75Srobert 
1794*d415bd75Srobert   /// Check if the induction described by \p ID is canonical, i.e.  has the same
1795*d415bd75Srobert   /// start, step (of 1), and type as the canonical IV.
1796*d415bd75Srobert   bool isCanonical(const InductionDescriptor &ID, Type *Ty) const;
1797*d415bd75Srobert };
1798*d415bd75Srobert 
1799*d415bd75Srobert /// A recipe for generating the active lane mask for the vector loop that is
1800*d415bd75Srobert /// used to predicate the vector operations.
1801*d415bd75Srobert /// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
1802*d415bd75Srobert /// remove VPActiveLaneMaskPHIRecipe.
1803*d415bd75Srobert class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
1804*d415bd75Srobert   DebugLoc DL;
1805*d415bd75Srobert 
1806*d415bd75Srobert public:
VPActiveLaneMaskPHIRecipe(VPValue * StartMask,DebugLoc DL)1807*d415bd75Srobert   VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
1808*d415bd75Srobert       : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask),
1809*d415bd75Srobert         DL(DL) {}
1810*d415bd75Srobert 
1811*d415bd75Srobert   ~VPActiveLaneMaskPHIRecipe() override = default;
1812*d415bd75Srobert 
VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)1813*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
1814*d415bd75Srobert 
1815*d415bd75Srobert   static inline bool classof(const VPHeaderPHIRecipe *D) {
1816*d415bd75Srobert     return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
1817*d415bd75Srobert   }
1818*d415bd75Srobert 
1819*d415bd75Srobert   /// Generate the active lane mask phi of the vector loop.
1820*d415bd75Srobert   void execute(VPTransformState &State) override;
1821*d415bd75Srobert 
1822*d415bd75Srobert #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1823*d415bd75Srobert   /// Print the recipe.
1824*d415bd75Srobert   void print(raw_ostream &O, const Twine &Indent,
1825*d415bd75Srobert              VPSlotTracker &SlotTracker) const override;
1826*d415bd75Srobert #endif
1827097a140dSpatrick };
1828097a140dSpatrick 
1829097a140dSpatrick /// A Recipe for widening the canonical induction variable of the vector loop.
1830*d415bd75Srobert class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue {
1831097a140dSpatrick public:
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe * CanonicalIV)1832*d415bd75Srobert   VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
1833*d415bd75Srobert       : VPRecipeBase(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}),
1834*d415bd75Srobert         VPValue(this) {}
183573471bf0Spatrick 
1836097a140dSpatrick   ~VPWidenCanonicalIVRecipe() override = default;
1837097a140dSpatrick 
1838*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
1839097a140dSpatrick 
1840097a140dSpatrick   /// Generate a canonical vector induction variable of the vector loop, with
1841097a140dSpatrick   /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
1842097a140dSpatrick   /// step = <VF*UF, VF*UF, ..., VF*UF>.
1843097a140dSpatrick   void execute(VPTransformState &State) override;
1844097a140dSpatrick 
184573471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1846097a140dSpatrick   /// Print the recipe.
1847097a140dSpatrick   void print(raw_ostream &O, const Twine &Indent,
1848097a140dSpatrick              VPSlotTracker &SlotTracker) const override;
184973471bf0Spatrick #endif
1850*d415bd75Srobert 
1851*d415bd75Srobert   /// Returns the scalar type of the induction.
getScalarType()1852*d415bd75Srobert   const Type *getScalarType() const {
1853*d415bd75Srobert     return cast<VPCanonicalIVPHIRecipe>(getOperand(0)->getDefiningRecipe())
1854*d415bd75Srobert         ->getScalarType();
1855*d415bd75Srobert   }
1856*d415bd75Srobert };
1857*d415bd75Srobert 
1858*d415bd75Srobert /// A recipe for converting the canonical IV value to the corresponding value of
1859*d415bd75Srobert /// an IV with different start and step values, using Start + CanonicalIV *
1860*d415bd75Srobert /// Step.
1861*d415bd75Srobert class VPDerivedIVRecipe : public VPRecipeBase, public VPValue {
1862*d415bd75Srobert   /// The type of the result value. It may be smaller than the type of the
1863*d415bd75Srobert   /// induction and in this case it will get truncated to ResultTy.
1864*d415bd75Srobert   Type *ResultTy;
1865*d415bd75Srobert 
1866*d415bd75Srobert   /// Induction descriptor for the induction the canonical IV is transformed to.
1867*d415bd75Srobert   const InductionDescriptor &IndDesc;
1868*d415bd75Srobert 
1869*d415bd75Srobert public:
VPDerivedIVRecipe(const InductionDescriptor & IndDesc,VPValue * Start,VPCanonicalIVPHIRecipe * CanonicalIV,VPValue * Step,Type * ResultTy)1870*d415bd75Srobert   VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
1871*d415bd75Srobert                     VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
1872*d415bd75Srobert                     Type *ResultTy)
1873*d415bd75Srobert       : VPRecipeBase(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
1874*d415bd75Srobert         VPValue(this), ResultTy(ResultTy), IndDesc(IndDesc) {}
1875*d415bd75Srobert 
1876*d415bd75Srobert   ~VPDerivedIVRecipe() override = default;
1877*d415bd75Srobert 
1878*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
1879*d415bd75Srobert 
1880*d415bd75Srobert   /// Generate the transformed value of the induction at offset StartValue (1.
1881*d415bd75Srobert   /// operand) + IV (2. operand) * StepValue (3, operand).
1882*d415bd75Srobert   void execute(VPTransformState &State) override;
1883*d415bd75Srobert 
1884*d415bd75Srobert #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1885*d415bd75Srobert   /// Print the recipe.
1886*d415bd75Srobert   void print(raw_ostream &O, const Twine &Indent,
1887*d415bd75Srobert              VPSlotTracker &SlotTracker) const override;
1888*d415bd75Srobert #endif
1889*d415bd75Srobert 
getStartValue()1890*d415bd75Srobert   VPValue *getStartValue() const { return getOperand(0); }
getCanonicalIV()1891*d415bd75Srobert   VPValue *getCanonicalIV() const { return getOperand(1); }
getStepValue()1892*d415bd75Srobert   VPValue *getStepValue() const { return getOperand(2); }
1893*d415bd75Srobert 
1894*d415bd75Srobert   /// Returns true if the recipe only uses the first lane of operand \p Op.
onlyFirstLaneUsed(const VPValue * Op)1895*d415bd75Srobert   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1896*d415bd75Srobert     assert(is_contained(operands(), Op) &&
1897*d415bd75Srobert            "Op must be an operand of the recipe");
1898*d415bd75Srobert     return true;
1899*d415bd75Srobert   }
1900*d415bd75Srobert };
1901*d415bd75Srobert 
1902*d415bd75Srobert /// A recipe for handling phi nodes of integer and floating-point inductions,
1903*d415bd75Srobert /// producing their scalar values.
1904*d415bd75Srobert class VPScalarIVStepsRecipe : public VPRecipeBase, public VPValue {
1905*d415bd75Srobert   const InductionDescriptor &IndDesc;
1906*d415bd75Srobert 
1907*d415bd75Srobert public:
VPScalarIVStepsRecipe(const InductionDescriptor & IndDesc,VPValue * IV,VPValue * Step)1908*d415bd75Srobert   VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV,
1909*d415bd75Srobert                         VPValue *Step)
1910*d415bd75Srobert       : VPRecipeBase(VPDef::VPScalarIVStepsSC, {IV, Step}), VPValue(this),
1911*d415bd75Srobert         IndDesc(IndDesc) {}
1912*d415bd75Srobert 
1913*d415bd75Srobert   ~VPScalarIVStepsRecipe() override = default;
1914*d415bd75Srobert 
1915*d415bd75Srobert   VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
1916*d415bd75Srobert 
1917*d415bd75Srobert   /// Generate the scalarized versions of the phi node as needed by their users.
1918*d415bd75Srobert   void execute(VPTransformState &State) override;
1919*d415bd75Srobert 
1920*d415bd75Srobert #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1921*d415bd75Srobert   /// Print the recipe.
1922*d415bd75Srobert   void print(raw_ostream &O, const Twine &Indent,
1923*d415bd75Srobert              VPSlotTracker &SlotTracker) const override;
1924*d415bd75Srobert #endif
1925*d415bd75Srobert 
getStepValue()1926*d415bd75Srobert   VPValue *getStepValue() const { return getOperand(1); }
1927*d415bd75Srobert 
1928*d415bd75Srobert   /// Returns true if the recipe only uses the first lane of operand \p Op.
onlyFirstLaneUsed(const VPValue * Op)1929*d415bd75Srobert   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1930*d415bd75Srobert     assert(is_contained(operands(), Op) &&
1931*d415bd75Srobert            "Op must be an operand of the recipe");
1932*d415bd75Srobert     return true;
1933*d415bd75Srobert   }
193409467b48Spatrick };
193509467b48Spatrick 
193609467b48Spatrick /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
193709467b48Spatrick /// holds a sequence of zero or more VPRecipe's each representing a sequence of
193873471bf0Spatrick /// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
193909467b48Spatrick class VPBasicBlock : public VPBlockBase {
194009467b48Spatrick public:
194109467b48Spatrick   using RecipeListTy = iplist<VPRecipeBase>;
194209467b48Spatrick 
194309467b48Spatrick private:
194409467b48Spatrick   /// The VPRecipes held in the order of output instructions to generate.
194509467b48Spatrick   RecipeListTy Recipes;
194609467b48Spatrick 
194709467b48Spatrick public:
194809467b48Spatrick   VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
194909467b48Spatrick       : VPBlockBase(VPBasicBlockSC, Name.str()) {
195009467b48Spatrick     if (Recipe)
195109467b48Spatrick       appendRecipe(Recipe);
195209467b48Spatrick   }
195309467b48Spatrick 
~VPBasicBlock()195473471bf0Spatrick   ~VPBasicBlock() override {
195573471bf0Spatrick     while (!Recipes.empty())
195673471bf0Spatrick       Recipes.pop_back();
195773471bf0Spatrick   }
195809467b48Spatrick 
195909467b48Spatrick   /// Instruction iterators...
196009467b48Spatrick   using iterator = RecipeListTy::iterator;
196109467b48Spatrick   using const_iterator = RecipeListTy::const_iterator;
196209467b48Spatrick   using reverse_iterator = RecipeListTy::reverse_iterator;
196309467b48Spatrick   using const_reverse_iterator = RecipeListTy::const_reverse_iterator;
196409467b48Spatrick 
196509467b48Spatrick   //===--------------------------------------------------------------------===//
196609467b48Spatrick   /// Recipe iterator methods
196709467b48Spatrick   ///
begin()196809467b48Spatrick   inline iterator begin() { return Recipes.begin(); }
begin()196909467b48Spatrick   inline const_iterator begin() const { return Recipes.begin(); }
end()197009467b48Spatrick   inline iterator end() { return Recipes.end(); }
end()197109467b48Spatrick   inline const_iterator end() const { return Recipes.end(); }
197209467b48Spatrick 
rbegin()197309467b48Spatrick   inline reverse_iterator rbegin() { return Recipes.rbegin(); }
rbegin()197409467b48Spatrick   inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
rend()197509467b48Spatrick   inline reverse_iterator rend() { return Recipes.rend(); }
rend()197609467b48Spatrick   inline const_reverse_iterator rend() const { return Recipes.rend(); }
197709467b48Spatrick 
size()197809467b48Spatrick   inline size_t size() const { return Recipes.size(); }
empty()197909467b48Spatrick   inline bool empty() const { return Recipes.empty(); }
front()198009467b48Spatrick   inline const VPRecipeBase &front() const { return Recipes.front(); }
front()198109467b48Spatrick   inline VPRecipeBase &front() { return Recipes.front(); }
back()198209467b48Spatrick   inline const VPRecipeBase &back() const { return Recipes.back(); }
back()198309467b48Spatrick   inline VPRecipeBase &back() { return Recipes.back(); }
198409467b48Spatrick 
198509467b48Spatrick   /// Returns a reference to the list of recipes.
getRecipeList()198609467b48Spatrick   RecipeListTy &getRecipeList() { return Recipes; }
198709467b48Spatrick 
198809467b48Spatrick   /// Returns a pointer to a member of the recipe list.
getSublistAccess(VPRecipeBase *)198909467b48Spatrick   static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
199009467b48Spatrick     return &VPBasicBlock::Recipes;
199109467b48Spatrick   }
199209467b48Spatrick 
199309467b48Spatrick   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPBlockBase * V)199409467b48Spatrick   static inline bool classof(const VPBlockBase *V) {
199509467b48Spatrick     return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC;
199609467b48Spatrick   }
199709467b48Spatrick 
insert(VPRecipeBase * Recipe,iterator InsertPt)199809467b48Spatrick   void insert(VPRecipeBase *Recipe, iterator InsertPt) {
199909467b48Spatrick     assert(Recipe && "No recipe to append.");
200009467b48Spatrick     assert(!Recipe->Parent && "Recipe already in VPlan");
200109467b48Spatrick     Recipe->Parent = this;
200209467b48Spatrick     Recipes.insert(InsertPt, Recipe);
200309467b48Spatrick   }
200409467b48Spatrick 
200509467b48Spatrick   /// Augment the existing recipes of a VPBasicBlock with an additional
200609467b48Spatrick   /// \p Recipe as the last recipe.
appendRecipe(VPRecipeBase * Recipe)200709467b48Spatrick   void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
200809467b48Spatrick 
200909467b48Spatrick   /// The method which generates the output IR instructions that correspond to
201009467b48Spatrick   /// this VPBasicBlock, thereby "executing" the VPlan.
2011*d415bd75Srobert   void execute(VPTransformState *State) override;
201209467b48Spatrick 
201373471bf0Spatrick   /// Return the position of the first non-phi node recipe in the block.
201473471bf0Spatrick   iterator getFirstNonPhi();
201573471bf0Spatrick 
201673471bf0Spatrick   /// Returns an iterator range over the PHI-like recipes in the block.
phis()201773471bf0Spatrick   iterator_range<iterator> phis() {
201873471bf0Spatrick     return make_range(begin(), getFirstNonPhi());
201973471bf0Spatrick   }
202073471bf0Spatrick 
202173471bf0Spatrick   void dropAllReferences(VPValue *NewValue) override;
202273471bf0Spatrick 
202373471bf0Spatrick   /// Split current block at \p SplitAt by inserting a new block between the
202473471bf0Spatrick   /// current block and its successors and moving all recipes starting at
202573471bf0Spatrick   /// SplitAt to the new block. Returns the new block.
202673471bf0Spatrick   VPBasicBlock *splitAt(iterator SplitAt);
202773471bf0Spatrick 
2028*d415bd75Srobert   VPRegionBlock *getEnclosingLoopRegion();
2029*d415bd75Srobert 
203073471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
203173471bf0Spatrick   /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
203273471bf0Spatrick   /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
203373471bf0Spatrick   ///
203473471bf0Spatrick   /// Note that the numbering is applied to the whole VPlan, so printing
203573471bf0Spatrick   /// individual blocks is consistent with the whole VPlan printing.
203673471bf0Spatrick   void print(raw_ostream &O, const Twine &Indent,
203773471bf0Spatrick              VPSlotTracker &SlotTracker) const override;
203873471bf0Spatrick   using VPBlockBase::print; // Get the print(raw_stream &O) version.
203973471bf0Spatrick #endif
204073471bf0Spatrick 
2041*d415bd75Srobert   /// If the block has multiple successors, return the branch recipe terminating
2042*d415bd75Srobert   /// the block. If there are no or only a single successor, return nullptr;
2043*d415bd75Srobert   VPRecipeBase *getTerminator();
2044*d415bd75Srobert   const VPRecipeBase *getTerminator() const;
2045*d415bd75Srobert 
2046*d415bd75Srobert   /// Returns true if the block is exiting it's parent region.
2047*d415bd75Srobert   bool isExiting() const;
2048*d415bd75Srobert 
204909467b48Spatrick private:
205009467b48Spatrick   /// Create an IR BasicBlock to hold the output instructions generated by this
205109467b48Spatrick   /// VPBasicBlock, and return it. Update the CFGState accordingly.
205209467b48Spatrick   BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
205309467b48Spatrick };
205409467b48Spatrick 
205509467b48Spatrick /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
2056*d415bd75Srobert /// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
205709467b48Spatrick /// A VPRegionBlock may indicate that its contents are to be replicated several
205809467b48Spatrick /// times. This is designed to support predicated scalarization, in which a
205909467b48Spatrick /// scalar if-then code structure needs to be generated VF * UF times. Having
206009467b48Spatrick /// this replication indicator helps to keep a single model for multiple
206109467b48Spatrick /// candidate VF's. The actual replication takes place only once the desired VF
206209467b48Spatrick /// and UF have been determined.
206309467b48Spatrick class VPRegionBlock : public VPBlockBase {
206409467b48Spatrick   /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
206509467b48Spatrick   VPBlockBase *Entry;
206609467b48Spatrick 
2067*d415bd75Srobert   /// Hold the Single Exiting block of the SESE region modelled by the
2068*d415bd75Srobert   /// VPRegionBlock.
2069*d415bd75Srobert   VPBlockBase *Exiting;
207009467b48Spatrick 
207109467b48Spatrick   /// An indicator whether this region is to generate multiple replicated
207209467b48Spatrick   /// instances of output IR corresponding to its VPBlockBases.
207309467b48Spatrick   bool IsReplicator;
207409467b48Spatrick 
207509467b48Spatrick public:
2076*d415bd75Srobert   VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
207709467b48Spatrick                 const std::string &Name = "", bool IsReplicator = false)
VPBlockBase(VPRegionBlockSC,Name)2078*d415bd75Srobert       : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
207909467b48Spatrick         IsReplicator(IsReplicator) {
208009467b48Spatrick     assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
2081*d415bd75Srobert     assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
208209467b48Spatrick     Entry->setParent(this);
2083*d415bd75Srobert     Exiting->setParent(this);
208409467b48Spatrick   }
208509467b48Spatrick   VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
VPBlockBase(VPRegionBlockSC,Name)2086*d415bd75Srobert       : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
208709467b48Spatrick         IsReplicator(IsReplicator) {}
208809467b48Spatrick 
~VPRegionBlock()208909467b48Spatrick   ~VPRegionBlock() override {
209073471bf0Spatrick     if (Entry) {
209173471bf0Spatrick       VPValue DummyValue;
209273471bf0Spatrick       Entry->dropAllReferences(&DummyValue);
209309467b48Spatrick       deleteCFG(Entry);
209409467b48Spatrick     }
209573471bf0Spatrick   }
209609467b48Spatrick 
209709467b48Spatrick   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPBlockBase * V)209809467b48Spatrick   static inline bool classof(const VPBlockBase *V) {
209909467b48Spatrick     return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
210009467b48Spatrick   }
210109467b48Spatrick 
getEntry()210209467b48Spatrick   const VPBlockBase *getEntry() const { return Entry; }
getEntry()210309467b48Spatrick   VPBlockBase *getEntry() { return Entry; }
210409467b48Spatrick 
210509467b48Spatrick   /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
210609467b48Spatrick   /// EntryBlock must have no predecessors.
setEntry(VPBlockBase * EntryBlock)210709467b48Spatrick   void setEntry(VPBlockBase *EntryBlock) {
210809467b48Spatrick     assert(EntryBlock->getPredecessors().empty() &&
210909467b48Spatrick            "Entry block cannot have predecessors.");
211009467b48Spatrick     Entry = EntryBlock;
211109467b48Spatrick     EntryBlock->setParent(this);
211209467b48Spatrick   }
211309467b48Spatrick 
getExiting()2114*d415bd75Srobert   const VPBlockBase *getExiting() const { return Exiting; }
getExiting()2115*d415bd75Srobert   VPBlockBase *getExiting() { return Exiting; }
211609467b48Spatrick 
2117*d415bd75Srobert   /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
2118*d415bd75Srobert   /// ExitingBlock must have no successors.
setExiting(VPBlockBase * ExitingBlock)2119*d415bd75Srobert   void setExiting(VPBlockBase *ExitingBlock) {
2120*d415bd75Srobert     assert(ExitingBlock->getSuccessors().empty() &&
212109467b48Spatrick            "Exit block cannot have successors.");
2122*d415bd75Srobert     Exiting = ExitingBlock;
2123*d415bd75Srobert     ExitingBlock->setParent(this);
2124*d415bd75Srobert   }
2125*d415bd75Srobert 
2126*d415bd75Srobert   /// Returns the pre-header VPBasicBlock of the loop region.
getPreheaderVPBB()2127*d415bd75Srobert   VPBasicBlock *getPreheaderVPBB() {
2128*d415bd75Srobert     assert(!isReplicator() && "should only get pre-header of loop regions");
2129*d415bd75Srobert     return getSinglePredecessor()->getExitingBasicBlock();
213009467b48Spatrick   }
213109467b48Spatrick 
213209467b48Spatrick   /// An indicator whether this region is to generate multiple replicated
213309467b48Spatrick   /// instances of output IR corresponding to its VPBlockBases.
isReplicator()213409467b48Spatrick   bool isReplicator() const { return IsReplicator; }
213509467b48Spatrick 
213609467b48Spatrick   /// The method which generates the output IR instructions that correspond to
213709467b48Spatrick   /// this VPRegionBlock, thereby "executing" the VPlan.
2138*d415bd75Srobert   void execute(VPTransformState *State) override;
213973471bf0Spatrick 
214073471bf0Spatrick   void dropAllReferences(VPValue *NewValue) override;
214173471bf0Spatrick 
214273471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
214373471bf0Spatrick   /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
214473471bf0Spatrick   /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
214573471bf0Spatrick   /// consequtive numbers.
214673471bf0Spatrick   ///
214773471bf0Spatrick   /// Note that the numbering is applied to the whole VPlan, so printing
214873471bf0Spatrick   /// individual regions is consistent with the whole VPlan printing.
214973471bf0Spatrick   void print(raw_ostream &O, const Twine &Indent,
215073471bf0Spatrick              VPSlotTracker &SlotTracker) const override;
215173471bf0Spatrick   using VPBlockBase::print; // Get the print(raw_stream &O) version.
215273471bf0Spatrick #endif
215309467b48Spatrick };
215409467b48Spatrick 
215509467b48Spatrick /// VPlan models a candidate for vectorization, encoding various decisions take
215609467b48Spatrick /// to produce efficient output IR, including which branches, basic-blocks and
215709467b48Spatrick /// output IR instructions to generate, and their cost. VPlan holds a
215809467b48Spatrick /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
215909467b48Spatrick /// VPBlock.
216009467b48Spatrick class VPlan {
216109467b48Spatrick   friend class VPlanPrinter;
2162097a140dSpatrick   friend class VPSlotTracker;
216309467b48Spatrick 
216409467b48Spatrick   /// Hold the single entry to the Hierarchical CFG of the VPlan.
216509467b48Spatrick   VPBlockBase *Entry;
216609467b48Spatrick 
216709467b48Spatrick   /// Holds the VFs applicable to this VPlan.
216873471bf0Spatrick   SmallSetVector<ElementCount, 2> VFs;
216909467b48Spatrick 
2170*d415bd75Srobert   /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
2171*d415bd75Srobert   /// any UF.
2172*d415bd75Srobert   SmallSetVector<unsigned, 2> UFs;
2173*d415bd75Srobert 
217409467b48Spatrick   /// Holds the name of the VPlan, for printing.
217509467b48Spatrick   std::string Name;
217609467b48Spatrick 
2177*d415bd75Srobert   /// Holds all the external definitions created for this VPlan. External
2178*d415bd75Srobert   /// definitions must be immutable and hold a pointer to their underlying IR.
2179*d415bd75Srobert   DenseMap<Value *, VPValue *> VPExternalDefs;
2180*d415bd75Srobert 
2181*d415bd75Srobert   /// Represents the trip count of the original loop, for folding
2182*d415bd75Srobert   /// the tail.
2183*d415bd75Srobert   VPValue *TripCount = nullptr;
218409467b48Spatrick 
218509467b48Spatrick   /// Represents the backedge taken count of the original loop, for folding
2186*d415bd75Srobert   /// the tail. It equals TripCount - 1.
218709467b48Spatrick   VPValue *BackedgeTakenCount = nullptr;
218809467b48Spatrick 
2189*d415bd75Srobert   /// Represents the vector trip count.
2190*d415bd75Srobert   VPValue VectorTripCount;
2191*d415bd75Srobert 
219209467b48Spatrick   /// Holds a mapping between Values and their corresponding VPValue inside
219309467b48Spatrick   /// VPlan.
219409467b48Spatrick   Value2VPValueTy Value2VPValue;
219509467b48Spatrick 
219673471bf0Spatrick   /// Contains all VPValues that been allocated by addVPValue directly and need
219773471bf0Spatrick   /// to be free when the plan's destructor is called.
219873471bf0Spatrick   SmallVector<VPValue *, 16> VPValuesToFree;
219973471bf0Spatrick 
2200*d415bd75Srobert   /// Indicates whether it is safe use the Value2VPValue mapping or if the
2201*d415bd75Srobert   /// mapping cannot be used any longer, because it is stale.
2202*d415bd75Srobert   bool Value2VPValueEnabled = true;
2203*d415bd75Srobert 
2204*d415bd75Srobert   /// Values used outside the plan.
2205*d415bd75Srobert   MapVector<PHINode *, VPLiveOut *> LiveOuts;
220609467b48Spatrick 
220709467b48Spatrick public:
Entry(Entry)2208097a140dSpatrick   VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {
2209097a140dSpatrick     if (Entry)
2210097a140dSpatrick       Entry->setPlan(this);
2211097a140dSpatrick   }
221209467b48Spatrick 
2213*d415bd75Srobert   ~VPlan();
221473471bf0Spatrick 
2215*d415bd75Srobert   /// Prepare the plan for execution, setting up the required live-in values.
2216*d415bd75Srobert   void prepareToExecute(Value *TripCount, Value *VectorTripCount,
2217*d415bd75Srobert                         Value *CanonicalIVStartValue, VPTransformState &State,
2218*d415bd75Srobert                         bool IsEpilogueVectorization);
221909467b48Spatrick 
222009467b48Spatrick   /// Generate the IR code for this VPlan.
2221*d415bd75Srobert   void execute(VPTransformState *State);
222209467b48Spatrick 
getEntry()222309467b48Spatrick   VPBlockBase *getEntry() { return Entry; }
getEntry()222409467b48Spatrick   const VPBlockBase *getEntry() const { return Entry; }
222509467b48Spatrick 
setEntry(VPBlockBase * Block)2226097a140dSpatrick   VPBlockBase *setEntry(VPBlockBase *Block) {
2227097a140dSpatrick     Entry = Block;
2228097a140dSpatrick     Block->setPlan(this);
2229097a140dSpatrick     return Entry;
2230097a140dSpatrick   }
223109467b48Spatrick 
2232*d415bd75Srobert   /// The trip count of the original loop.
getOrCreateTripCount()2233*d415bd75Srobert   VPValue *getOrCreateTripCount() {
2234*d415bd75Srobert     if (!TripCount)
2235*d415bd75Srobert       TripCount = new VPValue();
2236*d415bd75Srobert     return TripCount;
2237*d415bd75Srobert   }
2238*d415bd75Srobert 
223909467b48Spatrick   /// The backedge taken count of the original loop.
getOrCreateBackedgeTakenCount()224009467b48Spatrick   VPValue *getOrCreateBackedgeTakenCount() {
224109467b48Spatrick     if (!BackedgeTakenCount)
224209467b48Spatrick       BackedgeTakenCount = new VPValue();
224309467b48Spatrick     return BackedgeTakenCount;
224409467b48Spatrick   }
224509467b48Spatrick 
2246*d415bd75Srobert   /// The vector trip count.
getVectorTripCount()2247*d415bd75Srobert   VPValue &getVectorTripCount() { return VectorTripCount; }
2248*d415bd75Srobert 
2249*d415bd75Srobert   /// Mark the plan to indicate that using Value2VPValue is not safe any
2250*d415bd75Srobert   /// longer, because it may be stale.
disableValue2VPValue()2251*d415bd75Srobert   void disableValue2VPValue() { Value2VPValueEnabled = false; }
2252*d415bd75Srobert 
addVF(ElementCount VF)225373471bf0Spatrick   void addVF(ElementCount VF) { VFs.insert(VF); }
225409467b48Spatrick 
setVF(ElementCount VF)2255*d415bd75Srobert   void setVF(ElementCount VF) {
2256*d415bd75Srobert     assert(hasVF(VF) && "Cannot set VF not already in plan");
2257*d415bd75Srobert     VFs.clear();
2258*d415bd75Srobert     VFs.insert(VF);
2259*d415bd75Srobert   }
2260*d415bd75Srobert 
hasVF(ElementCount VF)226173471bf0Spatrick   bool hasVF(ElementCount VF) { return VFs.count(VF); }
226209467b48Spatrick 
hasScalarVFOnly()2263*d415bd75Srobert   bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
2264*d415bd75Srobert 
hasUF(unsigned UF)2265*d415bd75Srobert   bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
2266*d415bd75Srobert 
setUF(unsigned UF)2267*d415bd75Srobert   void setUF(unsigned UF) {
2268*d415bd75Srobert     assert(hasUF(UF) && "Cannot set the UF not already in plan");
2269*d415bd75Srobert     UFs.clear();
2270*d415bd75Srobert     UFs.insert(UF);
2271*d415bd75Srobert   }
2272*d415bd75Srobert 
2273*d415bd75Srobert   /// Return a string with the name of the plan and the applicable VFs and UFs.
2274*d415bd75Srobert   std::string getName() const;
227509467b48Spatrick 
setName(const Twine & newName)227609467b48Spatrick   void setName(const Twine &newName) { Name = newName.str(); }
227709467b48Spatrick 
2278*d415bd75Srobert   /// Get the existing or add a new external definition for \p V.
getOrAddExternalDef(Value * V)2279*d415bd75Srobert   VPValue *getOrAddExternalDef(Value *V) {
2280*d415bd75Srobert     auto I = VPExternalDefs.insert({V, nullptr});
2281*d415bd75Srobert     if (I.second)
2282*d415bd75Srobert       I.first->second = new VPValue(V);
2283*d415bd75Srobert     return I.first->second;
2284*d415bd75Srobert   }
228509467b48Spatrick 
addVPValue(Value * V)228609467b48Spatrick   void addVPValue(Value *V) {
2287*d415bd75Srobert     assert(Value2VPValueEnabled &&
2288*d415bd75Srobert            "IR value to VPValue mapping may be out of date!");
228909467b48Spatrick     assert(V && "Trying to add a null Value to VPlan");
229009467b48Spatrick     assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
229173471bf0Spatrick     VPValue *VPV = new VPValue(V);
229273471bf0Spatrick     Value2VPValue[V] = VPV;
229373471bf0Spatrick     VPValuesToFree.push_back(VPV);
229473471bf0Spatrick   }
229573471bf0Spatrick 
addVPValue(Value * V,VPValue * VPV)229673471bf0Spatrick   void addVPValue(Value *V, VPValue *VPV) {
2297*d415bd75Srobert     assert(Value2VPValueEnabled && "Value2VPValue mapping may be out of date!");
229873471bf0Spatrick     assert(V && "Trying to add a null Value to VPlan");
229973471bf0Spatrick     assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
230073471bf0Spatrick     Value2VPValue[V] = VPV;
230109467b48Spatrick   }
230209467b48Spatrick 
2303*d415bd75Srobert   /// Returns the VPValue for \p V. \p OverrideAllowed can be used to disable
2304*d415bd75Srobert   /// checking whether it is safe to query VPValues using IR Values.
2305*d415bd75Srobert   VPValue *getVPValue(Value *V, bool OverrideAllowed = false) {
2306*d415bd75Srobert     assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) &&
2307*d415bd75Srobert            "Value2VPValue mapping may be out of date!");
230809467b48Spatrick     assert(V && "Trying to get the VPValue of a null Value");
230909467b48Spatrick     assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
231009467b48Spatrick     return Value2VPValue[V];
231109467b48Spatrick   }
231209467b48Spatrick 
2313*d415bd75Srobert   /// Gets the VPValue or adds a new one (if none exists yet) for \p V. \p
2314*d415bd75Srobert   /// OverrideAllowed can be used to disable checking whether it is safe to
2315*d415bd75Srobert   /// query VPValues using IR Values.
2316*d415bd75Srobert   VPValue *getOrAddVPValue(Value *V, bool OverrideAllowed = false) {
2317*d415bd75Srobert     assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) &&
2318*d415bd75Srobert            "Value2VPValue mapping may be out of date!");
231909467b48Spatrick     assert(V && "Trying to get or add the VPValue of a null Value");
232009467b48Spatrick     if (!Value2VPValue.count(V))
232109467b48Spatrick       addVPValue(V);
232209467b48Spatrick     return getVPValue(V);
232309467b48Spatrick   }
232409467b48Spatrick 
removeVPValueFor(Value * V)2325*d415bd75Srobert   void removeVPValueFor(Value *V) {
2326*d415bd75Srobert     assert(Value2VPValueEnabled &&
2327*d415bd75Srobert            "IR value to VPValue mapping may be out of date!");
2328*d415bd75Srobert     Value2VPValue.erase(V);
2329*d415bd75Srobert   }
233009467b48Spatrick 
233173471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
233273471bf0Spatrick   /// Print this VPlan to \p O.
233373471bf0Spatrick   void print(raw_ostream &O) const;
233473471bf0Spatrick 
233573471bf0Spatrick   /// Print this VPlan in DOT format to \p O.
233673471bf0Spatrick   void printDOT(raw_ostream &O) const;
233773471bf0Spatrick 
233809467b48Spatrick   /// Dump the plan to stderr (for debugging).
233973471bf0Spatrick   LLVM_DUMP_METHOD void dump() const;
234073471bf0Spatrick #endif
234109467b48Spatrick 
2342097a140dSpatrick   /// Returns a range mapping the values the range \p Operands to their
2343097a140dSpatrick   /// corresponding VPValues.
2344097a140dSpatrick   iterator_range<mapped_iterator<Use *, std::function<VPValue *(Value *)>>>
mapToVPValues(User::op_range Operands)2345097a140dSpatrick   mapToVPValues(User::op_range Operands) {
2346097a140dSpatrick     std::function<VPValue *(Value *)> Fn = [this](Value *Op) {
2347097a140dSpatrick       return getOrAddVPValue(Op);
2348097a140dSpatrick     };
2349097a140dSpatrick     return map_range(Operands, Fn);
2350097a140dSpatrick   }
2351097a140dSpatrick 
2352*d415bd75Srobert   /// Returns the VPRegionBlock of the vector loop.
getVectorLoopRegion()2353*d415bd75Srobert   VPRegionBlock *getVectorLoopRegion() {
2354*d415bd75Srobert     return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
2355*d415bd75Srobert   }
getVectorLoopRegion()2356*d415bd75Srobert   const VPRegionBlock *getVectorLoopRegion() const {
2357*d415bd75Srobert     return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
2358*d415bd75Srobert   }
2359*d415bd75Srobert 
2360*d415bd75Srobert   /// Returns the canonical induction recipe of the vector loop.
getCanonicalIV()2361*d415bd75Srobert   VPCanonicalIVPHIRecipe *getCanonicalIV() {
2362*d415bd75Srobert     VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock();
2363*d415bd75Srobert     if (EntryVPBB->empty()) {
2364*d415bd75Srobert       // VPlan native path.
2365*d415bd75Srobert       EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
2366*d415bd75Srobert     }
2367*d415bd75Srobert     return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
2368*d415bd75Srobert   }
2369*d415bd75Srobert 
2370*d415bd75Srobert   /// Find and return the VPActiveLaneMaskPHIRecipe from the header - there
2371*d415bd75Srobert   /// be only one at most. If there isn't one, then return nullptr.
2372*d415bd75Srobert   VPActiveLaneMaskPHIRecipe *getActiveLaneMaskPhi();
2373*d415bd75Srobert 
2374*d415bd75Srobert   void addLiveOut(PHINode *PN, VPValue *V);
2375*d415bd75Srobert 
clearLiveOuts()2376*d415bd75Srobert   void clearLiveOuts() {
2377*d415bd75Srobert     for (auto &KV : LiveOuts)
2378*d415bd75Srobert       delete KV.second;
2379*d415bd75Srobert     LiveOuts.clear();
2380*d415bd75Srobert   }
2381*d415bd75Srobert 
removeLiveOut(PHINode * PN)2382*d415bd75Srobert   void removeLiveOut(PHINode *PN) {
2383*d415bd75Srobert     delete LiveOuts[PN];
2384*d415bd75Srobert     LiveOuts.erase(PN);
2385*d415bd75Srobert   }
2386*d415bd75Srobert 
getLiveOuts()2387*d415bd75Srobert   const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
2388*d415bd75Srobert     return LiveOuts;
2389*d415bd75Srobert   }
2390*d415bd75Srobert 
239109467b48Spatrick private:
239209467b48Spatrick   /// Add to the given dominator tree the header block and every new basic block
239309467b48Spatrick   /// that was created between it and the latch block, inclusive.
239409467b48Spatrick   static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopLatchBB,
239509467b48Spatrick                                   BasicBlock *LoopPreHeaderBB,
239609467b48Spatrick                                   BasicBlock *LoopExitBB);
239709467b48Spatrick };
239809467b48Spatrick 
239973471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
240009467b48Spatrick /// VPlanPrinter prints a given VPlan to a given output stream. The printing is
240109467b48Spatrick /// indented and follows the dot format.
240209467b48Spatrick class VPlanPrinter {
240309467b48Spatrick   raw_ostream &OS;
240409467b48Spatrick   const VPlan &Plan;
240509467b48Spatrick   unsigned Depth = 0;
240609467b48Spatrick   unsigned TabWidth = 2;
240709467b48Spatrick   std::string Indent;
240809467b48Spatrick   unsigned BID = 0;
240909467b48Spatrick   SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
241009467b48Spatrick 
2411097a140dSpatrick   VPSlotTracker SlotTracker;
2412097a140dSpatrick 
241309467b48Spatrick   /// Handle indentation.
bumpIndent(int b)241409467b48Spatrick   void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
241509467b48Spatrick 
241609467b48Spatrick   /// Print a given \p Block of the Plan.
241709467b48Spatrick   void dumpBlock(const VPBlockBase *Block);
241809467b48Spatrick 
241909467b48Spatrick   /// Print the information related to the CFG edges going out of a given
242009467b48Spatrick   /// \p Block, followed by printing the successor blocks themselves.
242109467b48Spatrick   void dumpEdges(const VPBlockBase *Block);
242209467b48Spatrick 
242309467b48Spatrick   /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
242409467b48Spatrick   /// its successor blocks.
242509467b48Spatrick   void dumpBasicBlock(const VPBasicBlock *BasicBlock);
242609467b48Spatrick 
242709467b48Spatrick   /// Print a given \p Region of the Plan.
242809467b48Spatrick   void dumpRegion(const VPRegionBlock *Region);
242909467b48Spatrick 
getOrCreateBID(const VPBlockBase * Block)243009467b48Spatrick   unsigned getOrCreateBID(const VPBlockBase *Block) {
243109467b48Spatrick     return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
243209467b48Spatrick   }
243309467b48Spatrick 
2434*d415bd75Srobert   Twine getOrCreateName(const VPBlockBase *Block);
243509467b48Spatrick 
2436*d415bd75Srobert   Twine getUID(const VPBlockBase *Block);
243709467b48Spatrick 
243809467b48Spatrick   /// Print the information related to a CFG edge between two VPBlockBases.
243909467b48Spatrick   void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
244009467b48Spatrick                 const Twine &Label);
244109467b48Spatrick 
244273471bf0Spatrick public:
VPlanPrinter(raw_ostream & O,const VPlan & P)244373471bf0Spatrick   VPlanPrinter(raw_ostream &O, const VPlan &P)
244473471bf0Spatrick       : OS(O), Plan(P), SlotTracker(&P) {}
244509467b48Spatrick 
244673471bf0Spatrick   LLVM_DUMP_METHOD void dump();
244709467b48Spatrick };
244809467b48Spatrick 
244909467b48Spatrick struct VPlanIngredient {
245073471bf0Spatrick   const Value *V;
245109467b48Spatrick 
VPlanIngredientVPlanIngredient245273471bf0Spatrick   VPlanIngredient(const Value *V) : V(V) {}
245373471bf0Spatrick 
245473471bf0Spatrick   void print(raw_ostream &O) const;
245509467b48Spatrick };
245609467b48Spatrick 
245709467b48Spatrick inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
245873471bf0Spatrick   I.print(OS);
245909467b48Spatrick   return OS;
246009467b48Spatrick }
246109467b48Spatrick 
246209467b48Spatrick inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
246373471bf0Spatrick   Plan.print(OS);
246409467b48Spatrick   return OS;
246509467b48Spatrick }
246673471bf0Spatrick #endif
246709467b48Spatrick 
246809467b48Spatrick //===----------------------------------------------------------------------===//
246909467b48Spatrick // VPlan Utilities
247009467b48Spatrick //===----------------------------------------------------------------------===//
247109467b48Spatrick 
247209467b48Spatrick /// Class that provides utilities for VPBlockBases in VPlan.
247309467b48Spatrick class VPBlockUtils {
247409467b48Spatrick public:
247509467b48Spatrick   VPBlockUtils() = delete;
247609467b48Spatrick 
247709467b48Spatrick   /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
247809467b48Spatrick   /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
2479*d415bd75Srobert   /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
2480*d415bd75Srobert   /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
2481*d415bd75Srobert   /// have neither successors nor predecessors.
insertBlockAfter(VPBlockBase * NewBlock,VPBlockBase * BlockPtr)248209467b48Spatrick   static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
248309467b48Spatrick     assert(NewBlock->getSuccessors().empty() &&
2484*d415bd75Srobert            NewBlock->getPredecessors().empty() &&
2485*d415bd75Srobert            "Can't insert new block with predecessors or successors.");
248609467b48Spatrick     NewBlock->setParent(BlockPtr->getParent());
2487*d415bd75Srobert     SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
2488*d415bd75Srobert     for (VPBlockBase *Succ : Succs) {
2489*d415bd75Srobert       disconnectBlocks(BlockPtr, Succ);
2490*d415bd75Srobert       connectBlocks(NewBlock, Succ);
2491*d415bd75Srobert     }
2492*d415bd75Srobert     connectBlocks(BlockPtr, NewBlock);
249309467b48Spatrick   }
249409467b48Spatrick 
249509467b48Spatrick   /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
249609467b48Spatrick   /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
249709467b48Spatrick   /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
2498*d415bd75Srobert   /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
2499*d415bd75Srobert   /// and \p IfTrue and \p IfFalse must have neither successors nor
2500*d415bd75Srobert   /// predecessors.
insertTwoBlocksAfter(VPBlockBase * IfTrue,VPBlockBase * IfFalse,VPBlockBase * BlockPtr)250109467b48Spatrick   static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
2502*d415bd75Srobert                                    VPBlockBase *BlockPtr) {
250309467b48Spatrick     assert(IfTrue->getSuccessors().empty() &&
250409467b48Spatrick            "Can't insert IfTrue with successors.");
250509467b48Spatrick     assert(IfFalse->getSuccessors().empty() &&
250609467b48Spatrick            "Can't insert IfFalse with successors.");
2507*d415bd75Srobert     BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
250809467b48Spatrick     IfTrue->setPredecessors({BlockPtr});
250909467b48Spatrick     IfFalse->setPredecessors({BlockPtr});
251009467b48Spatrick     IfTrue->setParent(BlockPtr->getParent());
251109467b48Spatrick     IfFalse->setParent(BlockPtr->getParent());
251209467b48Spatrick   }
251309467b48Spatrick 
251409467b48Spatrick   /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
251509467b48Spatrick   /// the successors of \p From and \p From to the predecessors of \p To. Both
251609467b48Spatrick   /// VPBlockBases must have the same parent, which can be null. Both
251709467b48Spatrick   /// VPBlockBases can be already connected to other VPBlockBases.
connectBlocks(VPBlockBase * From,VPBlockBase * To)251809467b48Spatrick   static void connectBlocks(VPBlockBase *From, VPBlockBase *To) {
251909467b48Spatrick     assert((From->getParent() == To->getParent()) &&
252009467b48Spatrick            "Can't connect two block with different parents");
252109467b48Spatrick     assert(From->getNumSuccessors() < 2 &&
252209467b48Spatrick            "Blocks can't have more than two successors.");
252309467b48Spatrick     From->appendSuccessor(To);
252409467b48Spatrick     To->appendPredecessor(From);
252509467b48Spatrick   }
252609467b48Spatrick 
252709467b48Spatrick   /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
252809467b48Spatrick   /// from the successors of \p From and \p From from the predecessors of \p To.
disconnectBlocks(VPBlockBase * From,VPBlockBase * To)252909467b48Spatrick   static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) {
253009467b48Spatrick     assert(To && "Successor to disconnect is null.");
253109467b48Spatrick     From->removeSuccessor(To);
253209467b48Spatrick     To->removePredecessor(From);
253309467b48Spatrick   }
253409467b48Spatrick 
253573471bf0Spatrick   /// Return an iterator range over \p Range which only includes \p BlockTy
253673471bf0Spatrick   /// blocks. The accesses are casted to \p BlockTy.
253773471bf0Spatrick   template <typename BlockTy, typename T>
blocksOnly(const T & Range)253873471bf0Spatrick   static auto blocksOnly(const T &Range) {
253973471bf0Spatrick     // Create BaseTy with correct const-ness based on BlockTy.
2540*d415bd75Srobert     using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,
2541*d415bd75Srobert                                       const VPBlockBase, VPBlockBase>;
254273471bf0Spatrick 
254373471bf0Spatrick     // We need to first create an iterator range over (const) BlocktTy & instead
254473471bf0Spatrick     // of (const) BlockTy * for filter_range to work properly.
254573471bf0Spatrick     auto Mapped =
254673471bf0Spatrick         map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
254773471bf0Spatrick     auto Filter = make_filter_range(
254873471bf0Spatrick         Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
254973471bf0Spatrick     return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
255073471bf0Spatrick       return cast<BlockTy>(&Block);
255173471bf0Spatrick     });
255273471bf0Spatrick   }
255309467b48Spatrick };
255409467b48Spatrick 
255509467b48Spatrick class VPInterleavedAccessInfo {
255609467b48Spatrick   DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
255709467b48Spatrick       InterleaveGroupMap;
255809467b48Spatrick 
255909467b48Spatrick   /// Type for mapping of instruction based interleave groups to VPInstruction
256009467b48Spatrick   /// interleave groups
256109467b48Spatrick   using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
256209467b48Spatrick                              InterleaveGroup<VPInstruction> *>;
256309467b48Spatrick 
256409467b48Spatrick   /// Recursively \p Region and populate VPlan based interleave groups based on
256509467b48Spatrick   /// \p IAI.
256609467b48Spatrick   void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
256709467b48Spatrick                    InterleavedAccessInfo &IAI);
256809467b48Spatrick   /// Recursively traverse \p Block and populate VPlan based interleave groups
256909467b48Spatrick   /// based on \p IAI.
257009467b48Spatrick   void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
257109467b48Spatrick                   InterleavedAccessInfo &IAI);
257209467b48Spatrick 
257309467b48Spatrick public:
257409467b48Spatrick   VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
257509467b48Spatrick 
~VPInterleavedAccessInfo()257609467b48Spatrick   ~VPInterleavedAccessInfo() {
257709467b48Spatrick     SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;
257809467b48Spatrick     // Avoid releasing a pointer twice.
257909467b48Spatrick     for (auto &I : InterleaveGroupMap)
258009467b48Spatrick       DelSet.insert(I.second);
258109467b48Spatrick     for (auto *Ptr : DelSet)
258209467b48Spatrick       delete Ptr;
258309467b48Spatrick   }
258409467b48Spatrick 
258509467b48Spatrick   /// Get the interleave group that \p Instr belongs to.
258609467b48Spatrick   ///
258709467b48Spatrick   /// \returns nullptr if doesn't have such group.
258809467b48Spatrick   InterleaveGroup<VPInstruction> *
getInterleaveGroup(VPInstruction * Instr)258909467b48Spatrick   getInterleaveGroup(VPInstruction *Instr) const {
259073471bf0Spatrick     return InterleaveGroupMap.lookup(Instr);
259109467b48Spatrick   }
259209467b48Spatrick };
259309467b48Spatrick 
259409467b48Spatrick /// Class that maps (parts of) an existing VPlan to trees of combined
259509467b48Spatrick /// VPInstructions.
259609467b48Spatrick class VPlanSlp {
259709467b48Spatrick   enum class OpMode { Failed, Load, Opcode };
259809467b48Spatrick 
259909467b48Spatrick   /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
260009467b48Spatrick   /// DenseMap keys.
260109467b48Spatrick   struct BundleDenseMapInfo {
getEmptyKeyBundleDenseMapInfo260209467b48Spatrick     static SmallVector<VPValue *, 4> getEmptyKey() {
260309467b48Spatrick       return {reinterpret_cast<VPValue *>(-1)};
260409467b48Spatrick     }
260509467b48Spatrick 
getTombstoneKeyBundleDenseMapInfo260609467b48Spatrick     static SmallVector<VPValue *, 4> getTombstoneKey() {
260709467b48Spatrick       return {reinterpret_cast<VPValue *>(-2)};
260809467b48Spatrick     }
260909467b48Spatrick 
getHashValueBundleDenseMapInfo261009467b48Spatrick     static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
261109467b48Spatrick       return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
261209467b48Spatrick     }
261309467b48Spatrick 
isEqualBundleDenseMapInfo261409467b48Spatrick     static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
261509467b48Spatrick                         const SmallVector<VPValue *, 4> &RHS) {
261609467b48Spatrick       return LHS == RHS;
261709467b48Spatrick     }
261809467b48Spatrick   };
261909467b48Spatrick 
262009467b48Spatrick   /// Mapping of values in the original VPlan to a combined VPInstruction.
262109467b48Spatrick   DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
262209467b48Spatrick       BundleToCombined;
262309467b48Spatrick 
262409467b48Spatrick   VPInterleavedAccessInfo &IAI;
262509467b48Spatrick 
262609467b48Spatrick   /// Basic block to operate on. For now, only instructions in a single BB are
262709467b48Spatrick   /// considered.
262809467b48Spatrick   const VPBasicBlock &BB;
262909467b48Spatrick 
263009467b48Spatrick   /// Indicates whether we managed to combine all visited instructions or not.
263109467b48Spatrick   bool CompletelySLP = true;
263209467b48Spatrick 
263309467b48Spatrick   /// Width of the widest combined bundle in bits.
263409467b48Spatrick   unsigned WidestBundleBits = 0;
263509467b48Spatrick 
263609467b48Spatrick   using MultiNodeOpTy =
263709467b48Spatrick       typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
263809467b48Spatrick 
263909467b48Spatrick   // Input operand bundles for the current multi node. Each multi node operand
264009467b48Spatrick   // bundle contains values not matching the multi node's opcode. They will
264109467b48Spatrick   // be reordered in reorderMultiNodeOps, once we completed building a
264209467b48Spatrick   // multi node.
264309467b48Spatrick   SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
264409467b48Spatrick 
264509467b48Spatrick   /// Indicates whether we are building a multi node currently.
264609467b48Spatrick   bool MultiNodeActive = false;
264709467b48Spatrick 
264809467b48Spatrick   /// Check if we can vectorize Operands together.
264909467b48Spatrick   bool areVectorizable(ArrayRef<VPValue *> Operands) const;
265009467b48Spatrick 
265109467b48Spatrick   /// Add combined instruction \p New for the bundle \p Operands.
265209467b48Spatrick   void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
265309467b48Spatrick 
265409467b48Spatrick   /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
265509467b48Spatrick   VPInstruction *markFailed();
265609467b48Spatrick 
265709467b48Spatrick   /// Reorder operands in the multi node to maximize sequential memory access
265809467b48Spatrick   /// and commutative operations.
265909467b48Spatrick   SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
266009467b48Spatrick 
266109467b48Spatrick   /// Choose the best candidate to use for the lane after \p Last. The set of
266209467b48Spatrick   /// candidates to choose from are values with an opcode matching \p Last's
266309467b48Spatrick   /// or loads consecutive to \p Last.
266409467b48Spatrick   std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
266509467b48Spatrick                                        SmallPtrSetImpl<VPValue *> &Candidates,
266609467b48Spatrick                                        VPInterleavedAccessInfo &IAI);
266709467b48Spatrick 
266873471bf0Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
266909467b48Spatrick   /// Print bundle \p Values to dbgs().
267009467b48Spatrick   void dumpBundle(ArrayRef<VPValue *> Values);
267173471bf0Spatrick #endif
267209467b48Spatrick 
267309467b48Spatrick public:
VPlanSlp(VPInterleavedAccessInfo & IAI,VPBasicBlock & BB)267409467b48Spatrick   VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
267509467b48Spatrick 
267673471bf0Spatrick   ~VPlanSlp() = default;
267709467b48Spatrick 
267809467b48Spatrick   /// Tries to build an SLP tree rooted at \p Operands and returns a
267909467b48Spatrick   /// VPInstruction combining \p Operands, if they can be combined.
268009467b48Spatrick   VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
268109467b48Spatrick 
268209467b48Spatrick   /// Return the width of the widest combined bundle in bits.
getWidestBundleBits()268309467b48Spatrick   unsigned getWidestBundleBits() const { return WidestBundleBits; }
268409467b48Spatrick 
268509467b48Spatrick   /// Return true if all visited instruction can be combined.
isCompletelySLP()268609467b48Spatrick   bool isCompletelySLP() const { return CompletelySLP; }
268709467b48Spatrick };
2688*d415bd75Srobert 
2689*d415bd75Srobert namespace vputils {
2690*d415bd75Srobert 
2691*d415bd75Srobert /// Returns true if only the first lane of \p Def is used.
2692*d415bd75Srobert bool onlyFirstLaneUsed(VPValue *Def);
2693*d415bd75Srobert 
2694*d415bd75Srobert /// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
2695*d415bd75Srobert /// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
2696*d415bd75Srobert /// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
2697*d415bd75Srobert /// pre-header already contains a recipe expanding \p Expr, return it. If not,
2698*d415bd75Srobert /// create a new one.
2699*d415bd75Srobert VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
2700*d415bd75Srobert                                        ScalarEvolution &SE);
2701*d415bd75Srobert 
2702*d415bd75Srobert /// Returns true if \p VPV is uniform after vectorization.
isUniformAfterVectorization(VPValue * VPV)2703*d415bd75Srobert inline bool isUniformAfterVectorization(VPValue *VPV) {
2704*d415bd75Srobert   // A value defined outside the vector region must be uniform after
2705*d415bd75Srobert   // vectorization inside a vector region.
2706*d415bd75Srobert   if (VPV->isDefinedOutsideVectorRegions())
2707*d415bd75Srobert     return true;
2708*d415bd75Srobert   VPRecipeBase *Def = VPV->getDefiningRecipe();
2709*d415bd75Srobert   assert(Def && "Must have definition for value defined inside vector region");
2710*d415bd75Srobert   if (auto Rep = dyn_cast<VPReplicateRecipe>(Def))
2711*d415bd75Srobert     return Rep->isUniform();
2712*d415bd75Srobert   return false;
2713*d415bd75Srobert }
2714*d415bd75Srobert } // end namespace vputils
2715*d415bd75Srobert 
271609467b48Spatrick } // end namespace llvm
271709467b48Spatrick 
271809467b48Spatrick #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
2719