xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h (revision cb14a3fe5122c879eae1fb480ed7ce82a699ddb6)
1 //===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file contains the declarations of the Vectorization Plan base classes:
11 /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12 ///    VPBlockBase, together implementing a Hierarchical CFG;
13 /// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14 ///    within VPBasicBlocks;
15 /// 3. VPInstruction, a concrete Recipe and VPUser modeling a single planned
16 ///    instruction;
17 /// 4. The VPlan class holding a candidate for vectorization;
18 /// 5. The VPlanPrinter class providing a way to print a plan in dot format;
19 /// These are documented in docs/VectorizationPlan.rst.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
24 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25 
26 #include "VPlanAnalysis.h"
27 #include "VPlanValue.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/MapVector.h"
30 #include "llvm/ADT/SmallBitVector.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Twine.h"
34 #include "llvm/ADT/ilist.h"
35 #include "llvm/ADT/ilist_node.h"
36 #include "llvm/Analysis/IVDescriptors.h"
37 #include "llvm/Analysis/LoopInfo.h"
38 #include "llvm/Analysis/VectorUtils.h"
39 #include "llvm/IR/DebugLoc.h"
40 #include "llvm/IR/FMF.h"
41 #include "llvm/IR/Operator.h"
42 #include <algorithm>
43 #include <cassert>
44 #include <cstddef>
45 #include <string>
46 
47 namespace llvm {
48 
49 class BasicBlock;
50 class DominatorTree;
51 class InnerLoopVectorizer;
52 class IRBuilderBase;
53 class LoopInfo;
54 class raw_ostream;
55 class RecurrenceDescriptor;
56 class SCEV;
57 class Type;
58 class VPBasicBlock;
59 class VPRegionBlock;
60 class VPlan;
61 class VPReplicateRecipe;
62 class VPlanSlp;
63 class Value;
64 class LoopVersioning;
65 
66 namespace Intrinsic {
67 typedef unsigned ID;
68 }
69 
70 /// Returns a calculation for the total number of elements for a given \p VF.
71 /// For fixed width vectors this value is a constant, whereas for scalable
72 /// vectors it is an expression determined at runtime.
73 Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
74 
75 /// Return a value for Step multiplied by VF.
76 Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
77                        int64_t Step);
78 
79 const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,
80                                 Loop *CurLoop = nullptr);
81 
82 /// A range of powers-of-2 vectorization factors with fixed start and
83 /// adjustable end. The range includes start and excludes end, e.g.,:
84 /// [1, 16) = {1, 2, 4, 8}
85 struct VFRange {
86   // A power of 2.
87   const ElementCount Start;
88 
89   // A power of 2. If End <= Start range is empty.
90   ElementCount End;
91 
92   bool isEmpty() const {
93     return End.getKnownMinValue() <= Start.getKnownMinValue();
94   }
95 
96   VFRange(const ElementCount &Start, const ElementCount &End)
97       : Start(Start), End(End) {
98     assert(Start.isScalable() == End.isScalable() &&
99            "Both Start and End should have the same scalable flag");
100     assert(isPowerOf2_32(Start.getKnownMinValue()) &&
101            "Expected Start to be a power of 2");
102     assert(isPowerOf2_32(End.getKnownMinValue()) &&
103            "Expected End to be a power of 2");
104   }
105 
106   /// Iterator to iterate over vectorization factors in a VFRange.
107   class iterator
108       : public iterator_facade_base<iterator, std::forward_iterator_tag,
109                                     ElementCount> {
110     ElementCount VF;
111 
112   public:
113     iterator(ElementCount VF) : VF(VF) {}
114 
115     bool operator==(const iterator &Other) const { return VF == Other.VF; }
116 
117     ElementCount operator*() const { return VF; }
118 
119     iterator &operator++() {
120       VF *= 2;
121       return *this;
122     }
123   };
124 
125   iterator begin() { return iterator(Start); }
126   iterator end() {
127     assert(isPowerOf2_32(End.getKnownMinValue()));
128     return iterator(End);
129   }
130 };
131 
132 using VPlanPtr = std::unique_ptr<VPlan>;
133 
134 /// In what follows, the term "input IR" refers to code that is fed into the
135 /// vectorizer whereas the term "output IR" refers to code that is generated by
136 /// the vectorizer.
137 
138 /// VPLane provides a way to access lanes in both fixed width and scalable
139 /// vectors, where for the latter the lane index sometimes needs calculating
140 /// as a runtime expression.
141 class VPLane {
142 public:
143   /// Kind describes how to interpret Lane.
144   enum class Kind : uint8_t {
145     /// For First, Lane is the index into the first N elements of a
146     /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
147     First,
148     /// For ScalableLast, Lane is the offset from the start of the last
149     /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
150     /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
151     /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
152     ScalableLast
153   };
154 
155 private:
156   /// in [0..VF)
157   unsigned Lane;
158 
159   /// Indicates how the Lane should be interpreted, as described above.
160   Kind LaneKind;
161 
162 public:
163   VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
164 
165   static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
166 
167   static VPLane getLastLaneForVF(const ElementCount &VF) {
168     unsigned LaneOffset = VF.getKnownMinValue() - 1;
169     Kind LaneKind;
170     if (VF.isScalable())
171       // In this case 'LaneOffset' refers to the offset from the start of the
172       // last subvector with VF.getKnownMinValue() elements.
173       LaneKind = VPLane::Kind::ScalableLast;
174     else
175       LaneKind = VPLane::Kind::First;
176     return VPLane(LaneOffset, LaneKind);
177   }
178 
179   /// Returns a compile-time known value for the lane index and asserts if the
180   /// lane can only be calculated at runtime.
181   unsigned getKnownLane() const {
182     assert(LaneKind == Kind::First);
183     return Lane;
184   }
185 
186   /// Returns an expression describing the lane index that can be used at
187   /// runtime.
188   Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
189 
190   /// Returns the Kind of lane offset.
191   Kind getKind() const { return LaneKind; }
192 
193   /// Returns true if this is the first lane of the whole vector.
194   bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
195 
196   /// Maps the lane to a cache index based on \p VF.
197   unsigned mapToCacheIndex(const ElementCount &VF) const {
198     switch (LaneKind) {
199     case VPLane::Kind::ScalableLast:
200       assert(VF.isScalable() && Lane < VF.getKnownMinValue());
201       return VF.getKnownMinValue() + Lane;
202     default:
203       assert(Lane < VF.getKnownMinValue());
204       return Lane;
205     }
206   }
207 
208   /// Returns the maxmimum number of lanes that we are able to consider
209   /// caching for \p VF.
210   static unsigned getNumCachedLanes(const ElementCount &VF) {
211     return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
212   }
213 };
214 
215 /// VPIteration represents a single point in the iteration space of the output
216 /// (vectorized and/or unrolled) IR loop.
217 struct VPIteration {
218   /// in [0..UF)
219   unsigned Part;
220 
221   VPLane Lane;
222 
223   VPIteration(unsigned Part, unsigned Lane,
224               VPLane::Kind Kind = VPLane::Kind::First)
225       : Part(Part), Lane(Lane, Kind) {}
226 
227   VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {}
228 
229   bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); }
230 };
231 
232 /// VPTransformState holds information passed down when "executing" a VPlan,
233 /// needed for generating the output IR.
234 struct VPTransformState {
235   VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
236                    DominatorTree *DT, IRBuilderBase &Builder,
237                    InnerLoopVectorizer *ILV, VPlan *Plan, LLVMContext &Ctx)
238       : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan),
239         LVer(nullptr), TypeAnalysis(Ctx) {}
240 
241   /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
242   ElementCount VF;
243   unsigned UF;
244 
245   /// Hold the indices to generate specific scalar instructions. Null indicates
246   /// that all instances are to be generated, using either scalar or vector
247   /// instructions.
248   std::optional<VPIteration> Instance;
249 
250   struct DataState {
251     /// A type for vectorized values in the new loop. Each value from the
252     /// original loop, when vectorized, is represented by UF vector values in
253     /// the new unrolled loop, where UF is the unroll factor.
254     typedef SmallVector<Value *, 2> PerPartValuesTy;
255 
256     DenseMap<VPValue *, PerPartValuesTy> PerPartOutput;
257 
258     using ScalarsPerPartValuesTy = SmallVector<SmallVector<Value *, 4>, 2>;
259     DenseMap<VPValue *, ScalarsPerPartValuesTy> PerPartScalars;
260   } Data;
261 
262   /// Get the generated Value for a given VPValue and a given Part. Note that
263   /// as some Defs are still created by ILV and managed in its ValueMap, this
264   /// method will delegate the call to ILV in such cases in order to provide
265   /// callers a consistent API.
266   /// \see set.
267   Value *get(VPValue *Def, unsigned Part);
268 
269   /// Get the generated Value for a given VPValue and given Part and Lane.
270   Value *get(VPValue *Def, const VPIteration &Instance);
271 
272   bool hasVectorValue(VPValue *Def, unsigned Part) {
273     auto I = Data.PerPartOutput.find(Def);
274     return I != Data.PerPartOutput.end() && Part < I->second.size() &&
275            I->second[Part];
276   }
277 
278   bool hasScalarValue(VPValue *Def, VPIteration Instance) {
279     auto I = Data.PerPartScalars.find(Def);
280     if (I == Data.PerPartScalars.end())
281       return false;
282     unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
283     return Instance.Part < I->second.size() &&
284            CacheIdx < I->second[Instance.Part].size() &&
285            I->second[Instance.Part][CacheIdx];
286   }
287 
288   /// Set the generated Value for a given VPValue and a given Part.
289   void set(VPValue *Def, Value *V, unsigned Part) {
290     if (!Data.PerPartOutput.count(Def)) {
291       DataState::PerPartValuesTy Entry(UF);
292       Data.PerPartOutput[Def] = Entry;
293     }
294     Data.PerPartOutput[Def][Part] = V;
295   }
296   /// Reset an existing vector value for \p Def and a given \p Part.
297   void reset(VPValue *Def, Value *V, unsigned Part) {
298     auto Iter = Data.PerPartOutput.find(Def);
299     assert(Iter != Data.PerPartOutput.end() &&
300            "need to overwrite existing value");
301     Iter->second[Part] = V;
302   }
303 
304   /// Set the generated scalar \p V for \p Def and the given \p Instance.
305   void set(VPValue *Def, Value *V, const VPIteration &Instance) {
306     auto Iter = Data.PerPartScalars.insert({Def, {}});
307     auto &PerPartVec = Iter.first->second;
308     while (PerPartVec.size() <= Instance.Part)
309       PerPartVec.emplace_back();
310     auto &Scalars = PerPartVec[Instance.Part];
311     unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
312     while (Scalars.size() <= CacheIdx)
313       Scalars.push_back(nullptr);
314     assert(!Scalars[CacheIdx] && "should overwrite existing value");
315     Scalars[CacheIdx] = V;
316   }
317 
318   /// Reset an existing scalar value for \p Def and a given \p Instance.
319   void reset(VPValue *Def, Value *V, const VPIteration &Instance) {
320     auto Iter = Data.PerPartScalars.find(Def);
321     assert(Iter != Data.PerPartScalars.end() &&
322            "need to overwrite existing value");
323     assert(Instance.Part < Iter->second.size() &&
324            "need to overwrite existing value");
325     unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
326     assert(CacheIdx < Iter->second[Instance.Part].size() &&
327            "need to overwrite existing value");
328     Iter->second[Instance.Part][CacheIdx] = V;
329   }
330 
331   /// Add additional metadata to \p To that was not present on \p Orig.
332   ///
333   /// Currently this is used to add the noalias annotations based on the
334   /// inserted memchecks.  Use this for instructions that are *cloned* into the
335   /// vector loop.
336   void addNewMetadata(Instruction *To, const Instruction *Orig);
337 
338   /// Add metadata from one instruction to another.
339   ///
340   /// This includes both the original MDs from \p From and additional ones (\see
341   /// addNewMetadata).  Use this for *newly created* instructions in the vector
342   /// loop.
343   void addMetadata(Instruction *To, Instruction *From);
344 
345   /// Similar to the previous function but it adds the metadata to a
346   /// vector of instructions.
347   void addMetadata(ArrayRef<Value *> To, Instruction *From);
348 
349   /// Set the debug location in the builder using the debug location \p DL.
350   void setDebugLocFrom(DebugLoc DL);
351 
352   /// Construct the vector value of a scalarized value \p V one lane at a time.
353   void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance);
354 
355   /// Hold state information used when constructing the CFG of the output IR,
356   /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
357   struct CFGState {
358     /// The previous VPBasicBlock visited. Initially set to null.
359     VPBasicBlock *PrevVPBB = nullptr;
360 
361     /// The previous IR BasicBlock created or used. Initially set to the new
362     /// header BasicBlock.
363     BasicBlock *PrevBB = nullptr;
364 
365     /// The last IR BasicBlock in the output IR. Set to the exit block of the
366     /// vector loop.
367     BasicBlock *ExitBB = nullptr;
368 
369     /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
370     /// of replication, maps the BasicBlock of the last replica created.
371     SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
372 
373     CFGState() = default;
374 
375     /// Returns the BasicBlock* mapped to the pre-header of the loop region
376     /// containing \p R.
377     BasicBlock *getPreheaderBBFor(VPRecipeBase *R);
378   } CFG;
379 
380   /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
381   LoopInfo *LI;
382 
383   /// Hold a pointer to Dominator Tree to register new basic blocks in the loop.
384   DominatorTree *DT;
385 
386   /// Hold a reference to the IRBuilder used to generate output IR code.
387   IRBuilderBase &Builder;
388 
389   VPValue2ValueTy VPValue2Value;
390 
391   /// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF).
392   Value *CanonicalIV = nullptr;
393 
394   /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
395   InnerLoopVectorizer *ILV;
396 
397   /// Pointer to the VPlan code is generated for.
398   VPlan *Plan;
399 
400   /// The loop object for the current parent region, or nullptr.
401   Loop *CurrentVectorLoop = nullptr;
402 
403   /// LoopVersioning.  It's only set up (non-null) if memchecks were
404   /// used.
405   ///
406   /// This is currently only used to add no-alias metadata based on the
407   /// memchecks.  The actually versioning is performed manually.
408   LoopVersioning *LVer = nullptr;
409 
410   /// Map SCEVs to their expanded values. Populated when executing
411   /// VPExpandSCEVRecipes.
412   DenseMap<const SCEV *, Value *> ExpandedSCEVs;
413 
414   /// VPlan-based type analysis.
415   VPTypeAnalysis TypeAnalysis;
416 };
417 
418 /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
419 /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
420 class VPBlockBase {
421   friend class VPBlockUtils;
422 
423   const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
424 
425   /// An optional name for the block.
426   std::string Name;
427 
428   /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
429   /// it is a topmost VPBlockBase.
430   VPRegionBlock *Parent = nullptr;
431 
432   /// List of predecessor blocks.
433   SmallVector<VPBlockBase *, 1> Predecessors;
434 
435   /// List of successor blocks.
436   SmallVector<VPBlockBase *, 1> Successors;
437 
438   /// VPlan containing the block. Can only be set on the entry block of the
439   /// plan.
440   VPlan *Plan = nullptr;
441 
442   /// Add \p Successor as the last successor to this block.
443   void appendSuccessor(VPBlockBase *Successor) {
444     assert(Successor && "Cannot add nullptr successor!");
445     Successors.push_back(Successor);
446   }
447 
448   /// Add \p Predecessor as the last predecessor to this block.
449   void appendPredecessor(VPBlockBase *Predecessor) {
450     assert(Predecessor && "Cannot add nullptr predecessor!");
451     Predecessors.push_back(Predecessor);
452   }
453 
454   /// Remove \p Predecessor from the predecessors of this block.
455   void removePredecessor(VPBlockBase *Predecessor) {
456     auto Pos = find(Predecessors, Predecessor);
457     assert(Pos && "Predecessor does not exist");
458     Predecessors.erase(Pos);
459   }
460 
461   /// Remove \p Successor from the successors of this block.
462   void removeSuccessor(VPBlockBase *Successor) {
463     auto Pos = find(Successors, Successor);
464     assert(Pos && "Successor does not exist");
465     Successors.erase(Pos);
466   }
467 
468 protected:
469   VPBlockBase(const unsigned char SC, const std::string &N)
470       : SubclassID(SC), Name(N) {}
471 
472 public:
473   /// An enumeration for keeping track of the concrete subclass of VPBlockBase
474   /// that are actually instantiated. Values of this enumeration are kept in the
475   /// SubclassID field of the VPBlockBase objects. They are used for concrete
476   /// type identification.
477   using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC };
478 
479   using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
480 
481   virtual ~VPBlockBase() = default;
482 
483   const std::string &getName() const { return Name; }
484 
485   void setName(const Twine &newName) { Name = newName.str(); }
486 
487   /// \return an ID for the concrete type of this object.
488   /// This is used to implement the classof checks. This should not be used
489   /// for any other purpose, as the values may change as LLVM evolves.
490   unsigned getVPBlockID() const { return SubclassID; }
491 
492   VPRegionBlock *getParent() { return Parent; }
493   const VPRegionBlock *getParent() const { return Parent; }
494 
495   /// \return A pointer to the plan containing the current block.
496   VPlan *getPlan();
497   const VPlan *getPlan() const;
498 
499   /// Sets the pointer of the plan containing the block. The block must be the
500   /// entry block into the VPlan.
501   void setPlan(VPlan *ParentPlan);
502 
503   void setParent(VPRegionBlock *P) { Parent = P; }
504 
505   /// \return the VPBasicBlock that is the entry of this VPBlockBase,
506   /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
507   /// VPBlockBase is a VPBasicBlock, it is returned.
508   const VPBasicBlock *getEntryBasicBlock() const;
509   VPBasicBlock *getEntryBasicBlock();
510 
511   /// \return the VPBasicBlock that is the exiting this VPBlockBase,
512   /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
513   /// VPBlockBase is a VPBasicBlock, it is returned.
514   const VPBasicBlock *getExitingBasicBlock() const;
515   VPBasicBlock *getExitingBasicBlock();
516 
517   const VPBlocksTy &getSuccessors() const { return Successors; }
518   VPBlocksTy &getSuccessors() { return Successors; }
519 
520   iterator_range<VPBlockBase **> successors() { return Successors; }
521 
522   const VPBlocksTy &getPredecessors() const { return Predecessors; }
523   VPBlocksTy &getPredecessors() { return Predecessors; }
524 
525   /// \return the successor of this VPBlockBase if it has a single successor.
526   /// Otherwise return a null pointer.
527   VPBlockBase *getSingleSuccessor() const {
528     return (Successors.size() == 1 ? *Successors.begin() : nullptr);
529   }
530 
531   /// \return the predecessor of this VPBlockBase if it has a single
532   /// predecessor. Otherwise return a null pointer.
533   VPBlockBase *getSinglePredecessor() const {
534     return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
535   }
536 
537   size_t getNumSuccessors() const { return Successors.size(); }
538   size_t getNumPredecessors() const { return Predecessors.size(); }
539 
540   /// An Enclosing Block of a block B is any block containing B, including B
541   /// itself. \return the closest enclosing block starting from "this", which
542   /// has successors. \return the root enclosing block if all enclosing blocks
543   /// have no successors.
544   VPBlockBase *getEnclosingBlockWithSuccessors();
545 
546   /// \return the closest enclosing block starting from "this", which has
547   /// predecessors. \return the root enclosing block if all enclosing blocks
548   /// have no predecessors.
549   VPBlockBase *getEnclosingBlockWithPredecessors();
550 
551   /// \return the successors either attached directly to this VPBlockBase or, if
552   /// this VPBlockBase is the exit block of a VPRegionBlock and has no
553   /// successors of its own, search recursively for the first enclosing
554   /// VPRegionBlock that has successors and return them. If no such
555   /// VPRegionBlock exists, return the (empty) successors of the topmost
556   /// VPBlockBase reached.
557   const VPBlocksTy &getHierarchicalSuccessors() {
558     return getEnclosingBlockWithSuccessors()->getSuccessors();
559   }
560 
561   /// \return the hierarchical successor of this VPBlockBase if it has a single
562   /// hierarchical successor. Otherwise return a null pointer.
563   VPBlockBase *getSingleHierarchicalSuccessor() {
564     return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
565   }
566 
567   /// \return the predecessors either attached directly to this VPBlockBase or,
568   /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
569   /// predecessors of its own, search recursively for the first enclosing
570   /// VPRegionBlock that has predecessors and return them. If no such
571   /// VPRegionBlock exists, return the (empty) predecessors of the topmost
572   /// VPBlockBase reached.
573   const VPBlocksTy &getHierarchicalPredecessors() {
574     return getEnclosingBlockWithPredecessors()->getPredecessors();
575   }
576 
577   /// \return the hierarchical predecessor of this VPBlockBase if it has a
578   /// single hierarchical predecessor. Otherwise return a null pointer.
579   VPBlockBase *getSingleHierarchicalPredecessor() {
580     return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
581   }
582 
583   /// Set a given VPBlockBase \p Successor as the single successor of this
584   /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
585   /// This VPBlockBase must have no successors.
586   void setOneSuccessor(VPBlockBase *Successor) {
587     assert(Successors.empty() && "Setting one successor when others exist.");
588     assert(Successor->getParent() == getParent() &&
589            "connected blocks must have the same parent");
590     appendSuccessor(Successor);
591   }
592 
593   /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
594   /// successors of this VPBlockBase. This VPBlockBase is not added as
595   /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
596   /// successors.
597   void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
598     assert(Successors.empty() && "Setting two successors when others exist.");
599     appendSuccessor(IfTrue);
600     appendSuccessor(IfFalse);
601   }
602 
603   /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
604   /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
605   /// as successor of any VPBasicBlock in \p NewPreds.
606   void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {
607     assert(Predecessors.empty() && "Block predecessors already set.");
608     for (auto *Pred : NewPreds)
609       appendPredecessor(Pred);
610   }
611 
612   /// Remove all the predecessor of this block.
613   void clearPredecessors() { Predecessors.clear(); }
614 
615   /// Remove all the successors of this block.
616   void clearSuccessors() { Successors.clear(); }
617 
618   /// The method which generates the output IR that correspond to this
619   /// VPBlockBase, thereby "executing" the VPlan.
620   virtual void execute(VPTransformState *State) = 0;
621 
622   /// Delete all blocks reachable from a given VPBlockBase, inclusive.
623   static void deleteCFG(VPBlockBase *Entry);
624 
625   /// Return true if it is legal to hoist instructions into this block.
626   bool isLegalToHoistInto() {
627     // There are currently no constraints that prevent an instruction to be
628     // hoisted into a VPBlockBase.
629     return true;
630   }
631 
632   /// Replace all operands of VPUsers in the block with \p NewValue and also
633   /// replaces all uses of VPValues defined in the block with NewValue.
634   virtual void dropAllReferences(VPValue *NewValue) = 0;
635 
636 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
637   void printAsOperand(raw_ostream &OS, bool PrintType) const {
638     OS << getName();
639   }
640 
641   /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
642   /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
643   /// consequtive numbers.
644   ///
645   /// Note that the numbering is applied to the whole VPlan, so printing
646   /// individual blocks is consistent with the whole VPlan printing.
647   virtual void print(raw_ostream &O, const Twine &Indent,
648                      VPSlotTracker &SlotTracker) const = 0;
649 
650   /// Print plain-text dump of this VPlan to \p O.
651   void print(raw_ostream &O) const {
652     VPSlotTracker SlotTracker(getPlan());
653     print(O, "", SlotTracker);
654   }
655 
656   /// Print the successors of this block to \p O, prefixing all lines with \p
657   /// Indent.
658   void printSuccessors(raw_ostream &O, const Twine &Indent) const;
659 
660   /// Dump this VPBlockBase to dbgs().
661   LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
662 #endif
663 };
664 
665 /// A value that is used outside the VPlan. The operand of the user needs to be
666 /// added to the associated LCSSA phi node.
667 class VPLiveOut : public VPUser {
668   PHINode *Phi;
669 
670 public:
671   VPLiveOut(PHINode *Phi, VPValue *Op)
672       : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
673 
674   static inline bool classof(const VPUser *U) {
675     return U->getVPUserID() == VPUser::VPUserID::LiveOut;
676   }
677 
678   /// Fixup the wrapped LCSSA phi node in the unique exit block.  This simply
679   /// means we need to add the appropriate incoming value from the middle
680   /// block as exiting edges from the scalar epilogue loop (if present) are
681   /// already in place, and we exit the vector loop exclusively to the middle
682   /// block.
683   void fixPhi(VPlan &Plan, VPTransformState &State);
684 
685   /// Returns true if the VPLiveOut uses scalars of operand \p Op.
686   bool usesScalars(const VPValue *Op) const override {
687     assert(is_contained(operands(), Op) &&
688            "Op must be an operand of the recipe");
689     return true;
690   }
691 
692   PHINode *getPhi() const { return Phi; }
693 
694 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
695   /// Print the VPLiveOut to \p O.
696   void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
697 #endif
698 };
699 
700 /// VPRecipeBase is a base class modeling a sequence of one or more output IR
701 /// instructions. VPRecipeBase owns the VPValues it defines through VPDef
702 /// and is responsible for deleting its defined values. Single-value
703 /// VPRecipeBases that also inherit from VPValue must make sure to inherit from
704 /// VPRecipeBase before VPValue.
705 class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
706                      public VPDef,
707                      public VPUser {
708   friend VPBasicBlock;
709   friend class VPBlockUtils;
710 
711   /// Each VPRecipe belongs to a single VPBasicBlock.
712   VPBasicBlock *Parent = nullptr;
713 
714   /// The debug location for the recipe.
715   DebugLoc DL;
716 
717 public:
718   VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
719                DebugLoc DL = {})
720       : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {}
721 
722   template <typename IterT>
723   VPRecipeBase(const unsigned char SC, iterator_range<IterT> Operands,
724                DebugLoc DL = {})
725       : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {}
726   virtual ~VPRecipeBase() = default;
727 
728   /// \return the VPBasicBlock which this VPRecipe belongs to.
729   VPBasicBlock *getParent() { return Parent; }
730   const VPBasicBlock *getParent() const { return Parent; }
731 
732   /// The method which generates the output IR instructions that correspond to
733   /// this VPRecipe, thereby "executing" the VPlan.
734   virtual void execute(VPTransformState &State) = 0;
735 
736   /// Insert an unlinked recipe into a basic block immediately before
737   /// the specified recipe.
738   void insertBefore(VPRecipeBase *InsertPos);
739   /// Insert an unlinked recipe into \p BB immediately before the insertion
740   /// point \p IP;
741   void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
742 
743   /// Insert an unlinked Recipe into a basic block immediately after
744   /// the specified Recipe.
745   void insertAfter(VPRecipeBase *InsertPos);
746 
747   /// Unlink this recipe from its current VPBasicBlock and insert it into
748   /// the VPBasicBlock that MovePos lives in, right after MovePos.
749   void moveAfter(VPRecipeBase *MovePos);
750 
751   /// Unlink this recipe and insert into BB before I.
752   ///
753   /// \pre I is a valid iterator into BB.
754   void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
755 
756   /// This method unlinks 'this' from the containing basic block, but does not
757   /// delete it.
758   void removeFromParent();
759 
760   /// This method unlinks 'this' from the containing basic block and deletes it.
761   ///
762   /// \returns an iterator pointing to the element after the erased one
763   iplist<VPRecipeBase>::iterator eraseFromParent();
764 
765   /// Returns the underlying instruction, if the recipe is a VPValue or nullptr
766   /// otherwise.
767   Instruction *getUnderlyingInstr() {
768     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue());
769   }
770   const Instruction *getUnderlyingInstr() const {
771     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue());
772   }
773 
774   /// Method to support type inquiry through isa, cast, and dyn_cast.
775   static inline bool classof(const VPDef *D) {
776     // All VPDefs are also VPRecipeBases.
777     return true;
778   }
779 
780   static inline bool classof(const VPUser *U) {
781     return U->getVPUserID() == VPUser::VPUserID::Recipe;
782   }
783 
784   /// Returns true if the recipe may have side-effects.
785   bool mayHaveSideEffects() const;
786 
787   /// Returns true for PHI-like recipes.
788   bool isPhi() const {
789     return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
790   }
791 
792   /// Returns true if the recipe may read from memory.
793   bool mayReadFromMemory() const;
794 
795   /// Returns true if the recipe may write to memory.
796   bool mayWriteToMemory() const;
797 
798   /// Returns true if the recipe may read from or write to memory.
799   bool mayReadOrWriteMemory() const {
800     return mayReadFromMemory() || mayWriteToMemory();
801   }
802 
803   /// Returns the debug location of the recipe.
804   DebugLoc getDebugLoc() const { return DL; }
805 };
806 
807 // Helper macro to define common classof implementations for recipes.
808 #define VP_CLASSOF_IMPL(VPDefID)                                               \
809   static inline bool classof(const VPDef *D) {                                 \
810     return D->getVPDefID() == VPDefID;                                         \
811   }                                                                            \
812   static inline bool classof(const VPValue *V) {                               \
813     auto *R = V->getDefiningRecipe();                                          \
814     return R && R->getVPDefID() == VPDefID;                                    \
815   }                                                                            \
816   static inline bool classof(const VPUser *U) {                                \
817     auto *R = dyn_cast<VPRecipeBase>(U);                                       \
818     return R && R->getVPDefID() == VPDefID;                                    \
819   }                                                                            \
820   static inline bool classof(const VPRecipeBase *R) {                          \
821     return R->getVPDefID() == VPDefID;                                         \
822   }
823 
824 /// Class to record LLVM IR flag for a recipe along with it.
825 class VPRecipeWithIRFlags : public VPRecipeBase {
826   enum class OperationType : unsigned char {
827     Cmp,
828     OverflowingBinOp,
829     DisjointOp,
830     PossiblyExactOp,
831     GEPOp,
832     FPMathOp,
833     NonNegOp,
834     Other
835   };
836 
837 public:
838   struct WrapFlagsTy {
839     char HasNUW : 1;
840     char HasNSW : 1;
841 
842     WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
843   };
844 
845 private:
846   struct DisjointFlagsTy {
847     char IsDisjoint : 1;
848   };
849   struct ExactFlagsTy {
850     char IsExact : 1;
851   };
852   struct GEPFlagsTy {
853     char IsInBounds : 1;
854   };
855   struct NonNegFlagsTy {
856     char NonNeg : 1;
857   };
858   struct FastMathFlagsTy {
859     char AllowReassoc : 1;
860     char NoNaNs : 1;
861     char NoInfs : 1;
862     char NoSignedZeros : 1;
863     char AllowReciprocal : 1;
864     char AllowContract : 1;
865     char ApproxFunc : 1;
866 
867     FastMathFlagsTy(const FastMathFlags &FMF);
868   };
869 
870   OperationType OpType;
871 
872   union {
873     CmpInst::Predicate CmpPredicate;
874     WrapFlagsTy WrapFlags;
875     DisjointFlagsTy DisjointFlags;
876     ExactFlagsTy ExactFlags;
877     GEPFlagsTy GEPFlags;
878     NonNegFlagsTy NonNegFlags;
879     FastMathFlagsTy FMFs;
880     unsigned AllFlags;
881   };
882 
883 public:
884   template <typename IterT>
885   VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
886       : VPRecipeBase(SC, Operands, DL) {
887     OpType = OperationType::Other;
888     AllFlags = 0;
889   }
890 
891   template <typename IterT>
892   VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
893       : VPRecipeWithIRFlags(SC, Operands, I.getDebugLoc()) {
894     if (auto *Op = dyn_cast<CmpInst>(&I)) {
895       OpType = OperationType::Cmp;
896       CmpPredicate = Op->getPredicate();
897     } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
898       OpType = OperationType::DisjointOp;
899       DisjointFlags.IsDisjoint = Op->isDisjoint();
900     } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
901       OpType = OperationType::OverflowingBinOp;
902       WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
903     } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
904       OpType = OperationType::PossiblyExactOp;
905       ExactFlags.IsExact = Op->isExact();
906     } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
907       OpType = OperationType::GEPOp;
908       GEPFlags.IsInBounds = GEP->isInBounds();
909     } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
910       OpType = OperationType::NonNegOp;
911       NonNegFlags.NonNeg = PNNI->hasNonNeg();
912     } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
913       OpType = OperationType::FPMathOp;
914       FMFs = Op->getFastMathFlags();
915     }
916   }
917 
918   template <typename IterT>
919   VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
920                       CmpInst::Predicate Pred, DebugLoc DL = {})
921       : VPRecipeBase(SC, Operands, DL), OpType(OperationType::Cmp),
922         CmpPredicate(Pred) {}
923 
924   template <typename IterT>
925   VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
926                       WrapFlagsTy WrapFlags, DebugLoc DL = {})
927       : VPRecipeBase(SC, Operands, DL), OpType(OperationType::OverflowingBinOp),
928         WrapFlags(WrapFlags) {}
929 
930   template <typename IterT>
931   VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
932                       FastMathFlags FMFs, DebugLoc DL = {})
933       : VPRecipeBase(SC, Operands, DL), OpType(OperationType::FPMathOp),
934         FMFs(FMFs) {}
935 
936   static inline bool classof(const VPRecipeBase *R) {
937     return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
938            R->getVPDefID() == VPRecipeBase::VPWidenSC ||
939            R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
940            R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
941            R->getVPDefID() == VPRecipeBase::VPReplicateSC;
942   }
943 
944   /// Drop all poison-generating flags.
945   void dropPoisonGeneratingFlags() {
946     // NOTE: This needs to be kept in-sync with
947     // Instruction::dropPoisonGeneratingFlags.
948     switch (OpType) {
949     case OperationType::OverflowingBinOp:
950       WrapFlags.HasNUW = false;
951       WrapFlags.HasNSW = false;
952       break;
953     case OperationType::DisjointOp:
954       DisjointFlags.IsDisjoint = false;
955       break;
956     case OperationType::PossiblyExactOp:
957       ExactFlags.IsExact = false;
958       break;
959     case OperationType::GEPOp:
960       GEPFlags.IsInBounds = false;
961       break;
962     case OperationType::FPMathOp:
963       FMFs.NoNaNs = false;
964       FMFs.NoInfs = false;
965       break;
966     case OperationType::NonNegOp:
967       NonNegFlags.NonNeg = false;
968       break;
969     case OperationType::Cmp:
970     case OperationType::Other:
971       break;
972     }
973   }
974 
975   /// Set the IR flags for \p I.
976   void setFlags(Instruction *I) const {
977     switch (OpType) {
978     case OperationType::OverflowingBinOp:
979       I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
980       I->setHasNoSignedWrap(WrapFlags.HasNSW);
981       break;
982     case OperationType::DisjointOp:
983       cast<PossiblyDisjointInst>(I)->setIsDisjoint(DisjointFlags.IsDisjoint);
984       break;
985     case OperationType::PossiblyExactOp:
986       I->setIsExact(ExactFlags.IsExact);
987       break;
988     case OperationType::GEPOp:
989       cast<GetElementPtrInst>(I)->setIsInBounds(GEPFlags.IsInBounds);
990       break;
991     case OperationType::FPMathOp:
992       I->setHasAllowReassoc(FMFs.AllowReassoc);
993       I->setHasNoNaNs(FMFs.NoNaNs);
994       I->setHasNoInfs(FMFs.NoInfs);
995       I->setHasNoSignedZeros(FMFs.NoSignedZeros);
996       I->setHasAllowReciprocal(FMFs.AllowReciprocal);
997       I->setHasAllowContract(FMFs.AllowContract);
998       I->setHasApproxFunc(FMFs.ApproxFunc);
999       break;
1000     case OperationType::NonNegOp:
1001       I->setNonNeg(NonNegFlags.NonNeg);
1002       break;
1003     case OperationType::Cmp:
1004     case OperationType::Other:
1005       break;
1006     }
1007   }
1008 
1009   CmpInst::Predicate getPredicate() const {
1010     assert(OpType == OperationType::Cmp &&
1011            "recipe doesn't have a compare predicate");
1012     return CmpPredicate;
1013   }
1014 
1015   bool isInBounds() const {
1016     assert(OpType == OperationType::GEPOp &&
1017            "recipe doesn't have inbounds flag");
1018     return GEPFlags.IsInBounds;
1019   }
1020 
1021   /// Returns true if the recipe has fast-math flags.
1022   bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
1023 
1024   FastMathFlags getFastMathFlags() const;
1025 
1026   bool hasNoUnsignedWrap() const {
1027     assert(OpType == OperationType::OverflowingBinOp &&
1028            "recipe doesn't have a NUW flag");
1029     return WrapFlags.HasNUW;
1030   }
1031 
1032   bool hasNoSignedWrap() const {
1033     assert(OpType == OperationType::OverflowingBinOp &&
1034            "recipe doesn't have a NSW flag");
1035     return WrapFlags.HasNSW;
1036   }
1037 
1038 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1039   void printFlags(raw_ostream &O) const;
1040 #endif
1041 };
1042 
1043 /// This is a concrete Recipe that models a single VPlan-level instruction.
1044 /// While as any Recipe it may generate a sequence of IR instructions when
1045 /// executed, these instructions would always form a single-def expression as
1046 /// the VPInstruction is also a single def-use vertex.
1047 class VPInstruction : public VPRecipeWithIRFlags, public VPValue {
1048   friend class VPlanSlp;
1049 
1050 public:
1051   /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1052   enum {
1053     FirstOrderRecurrenceSplice =
1054         Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1055                                       // values of a first-order recurrence.
1056     Not,
1057     SLPLoad,
1058     SLPStore,
1059     ActiveLaneMask,
1060     CalculateTripCountMinusVF,
1061     // Increment the canonical IV separately for each unrolled part.
1062     CanonicalIVIncrementForPart,
1063     BranchOnCount,
1064     BranchOnCond
1065   };
1066 
1067 private:
1068   typedef unsigned char OpcodeTy;
1069   OpcodeTy Opcode;
1070 
1071   /// An optional name that can be used for the generated IR instruction.
1072   const std::string Name;
1073 
1074   /// Utility method serving execute(): generates a single instance of the
1075   /// modeled instruction. \returns the generated value for \p Part.
1076   /// In some cases an existing value is returned rather than a generated
1077   /// one.
1078   Value *generateInstruction(VPTransformState &State, unsigned Part);
1079 
1080 #if !defined(NDEBUG)
1081   /// Return true if the VPInstruction is a floating point math operation, i.e.
1082   /// has fast-math flags.
1083   bool isFPMathOp() const;
1084 #endif
1085 
1086 protected:
1087   void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
1088 
1089 public:
1090   VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL,
1091                 const Twine &Name = "")
1092       : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1093         VPValue(this), Opcode(Opcode), Name(Name.str()) {}
1094 
1095   VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1096                 DebugLoc DL = {}, const Twine &Name = "")
1097       : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name) {}
1098 
1099   VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, VPValue *A,
1100                 VPValue *B, DebugLoc DL = {}, const Twine &Name = "");
1101 
1102   VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1103                 WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
1104       : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
1105         VPValue(this), Opcode(Opcode), Name(Name.str()) {}
1106 
1107   VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1108                 FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
1109 
1110   VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1111 
1112   unsigned getOpcode() const { return Opcode; }
1113 
1114   /// Generate the instruction.
1115   /// TODO: We currently execute only per-part unless a specific instance is
1116   /// provided.
1117   void execute(VPTransformState &State) override;
1118 
1119 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1120   /// Print the VPInstruction to \p O.
1121   void print(raw_ostream &O, const Twine &Indent,
1122              VPSlotTracker &SlotTracker) const override;
1123 
1124   /// Print the VPInstruction to dbgs() (for debugging).
1125   LLVM_DUMP_METHOD void dump() const;
1126 #endif
1127 
1128   /// Return true if this instruction may modify memory.
1129   bool mayWriteToMemory() const {
1130     // TODO: we can use attributes of the called function to rule out memory
1131     //       modifications.
1132     return Opcode == Instruction::Store || Opcode == Instruction::Call ||
1133            Opcode == Instruction::Invoke || Opcode == SLPStore;
1134   }
1135 
1136   bool hasResult() const {
1137     // CallInst may or may not have a result, depending on the called function.
1138     // Conservatively return calls have results for now.
1139     switch (getOpcode()) {
1140     case Instruction::Ret:
1141     case Instruction::Br:
1142     case Instruction::Store:
1143     case Instruction::Switch:
1144     case Instruction::IndirectBr:
1145     case Instruction::Resume:
1146     case Instruction::CatchRet:
1147     case Instruction::Unreachable:
1148     case Instruction::Fence:
1149     case Instruction::AtomicRMW:
1150     case VPInstruction::BranchOnCond:
1151     case VPInstruction::BranchOnCount:
1152       return false;
1153     default:
1154       return true;
1155     }
1156   }
1157 
1158   /// Returns true if the recipe only uses the first lane of operand \p Op.
1159   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1160     assert(is_contained(operands(), Op) &&
1161            "Op must be an operand of the recipe");
1162     if (getOperand(0) != Op)
1163       return false;
1164     switch (getOpcode()) {
1165     default:
1166       return false;
1167     case VPInstruction::ActiveLaneMask:
1168     case VPInstruction::CalculateTripCountMinusVF:
1169     case VPInstruction::CanonicalIVIncrementForPart:
1170     case VPInstruction::BranchOnCount:
1171       return true;
1172     };
1173     llvm_unreachable("switch should return");
1174   }
1175 
1176   /// Returns true if the recipe only uses the first part of operand \p Op.
1177   bool onlyFirstPartUsed(const VPValue *Op) const override {
1178     assert(is_contained(operands(), Op) &&
1179            "Op must be an operand of the recipe");
1180     if (getOperand(0) != Op)
1181       return false;
1182     switch (getOpcode()) {
1183     default:
1184       return false;
1185     case VPInstruction::BranchOnCount:
1186       return true;
1187     };
1188     llvm_unreachable("switch should return");
1189   }
1190 };
1191 
1192 /// VPWidenRecipe is a recipe for producing a copy of vector type its
1193 /// ingredient. This recipe covers most of the traditional vectorization cases
1194 /// where each ingredient transforms into a vectorized version of itself.
1195 class VPWidenRecipe : public VPRecipeWithIRFlags, public VPValue {
1196   unsigned Opcode;
1197 
1198 public:
1199   template <typename IterT>
1200   VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
1201       : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), VPValue(this, &I),
1202         Opcode(I.getOpcode()) {}
1203 
1204   ~VPWidenRecipe() override = default;
1205 
1206   VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1207 
1208   /// Produce widened copies of all Ingredients.
1209   void execute(VPTransformState &State) override;
1210 
1211   unsigned getOpcode() const { return Opcode; }
1212 
1213 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1214   /// Print the recipe.
1215   void print(raw_ostream &O, const Twine &Indent,
1216              VPSlotTracker &SlotTracker) const override;
1217 #endif
1218 };
1219 
1220 /// VPWidenCastRecipe is a recipe to create vector cast instructions.
1221 class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPValue {
1222   /// Cast instruction opcode.
1223   Instruction::CastOps Opcode;
1224 
1225   /// Result type for the cast.
1226   Type *ResultTy;
1227 
1228 public:
1229   VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
1230                     CastInst &UI)
1231       : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), VPValue(this, &UI),
1232         Opcode(Opcode), ResultTy(ResultTy) {
1233     assert(UI.getOpcode() == Opcode &&
1234            "opcode of underlying cast doesn't match");
1235     assert(UI.getType() == ResultTy &&
1236            "result type of underlying cast doesn't match");
1237   }
1238 
1239   VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
1240       : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), VPValue(this, nullptr),
1241         Opcode(Opcode), ResultTy(ResultTy) {}
1242 
1243   ~VPWidenCastRecipe() override = default;
1244 
1245   VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1246 
1247   /// Produce widened copies of the cast.
1248   void execute(VPTransformState &State) override;
1249 
1250 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1251   /// Print the recipe.
1252   void print(raw_ostream &O, const Twine &Indent,
1253              VPSlotTracker &SlotTracker) const override;
1254 #endif
1255 
1256   Instruction::CastOps getOpcode() const { return Opcode; }
1257 
1258   /// Returns the result type of the cast.
1259   Type *getResultType() const { return ResultTy; }
1260 };
1261 
1262 /// A recipe for widening Call instructions.
1263 class VPWidenCallRecipe : public VPRecipeBase, public VPValue {
1264   /// ID of the vector intrinsic to call when widening the call. If set the
1265   /// Intrinsic::not_intrinsic, a library call will be used instead.
1266   Intrinsic::ID VectorIntrinsicID;
1267   /// If this recipe represents a library call, Variant stores a pointer to
1268   /// the chosen function. There is a 1:1 mapping between a given VF and the
1269   /// chosen vectorized variant, so there will be a different vplan for each
1270   /// VF with a valid variant.
1271   Function *Variant;
1272 
1273 public:
1274   template <typename IterT>
1275   VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments,
1276                     Intrinsic::ID VectorIntrinsicID,
1277                     Function *Variant = nullptr)
1278       : VPRecipeBase(VPDef::VPWidenCallSC, CallArguments), VPValue(this, &I),
1279         VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {}
1280 
1281   ~VPWidenCallRecipe() override = default;
1282 
1283   VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1284 
1285   /// Produce a widened version of the call instruction.
1286   void execute(VPTransformState &State) override;
1287 
1288 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1289   /// Print the recipe.
1290   void print(raw_ostream &O, const Twine &Indent,
1291              VPSlotTracker &SlotTracker) const override;
1292 #endif
1293 };
1294 
1295 /// A recipe for widening select instructions.
1296 struct VPWidenSelectRecipe : public VPRecipeBase, public VPValue {
1297   template <typename IterT>
1298   VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands)
1299       : VPRecipeBase(VPDef::VPWidenSelectSC, Operands, I.getDebugLoc()),
1300         VPValue(this, &I) {}
1301 
1302   ~VPWidenSelectRecipe() override = default;
1303 
1304   VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1305 
1306   /// Produce a widened version of the select instruction.
1307   void execute(VPTransformState &State) override;
1308 
1309 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1310   /// Print the recipe.
1311   void print(raw_ostream &O, const Twine &Indent,
1312              VPSlotTracker &SlotTracker) const override;
1313 #endif
1314 
1315   VPValue *getCond() const {
1316     return getOperand(0);
1317   }
1318 
1319   bool isInvariantCond() const {
1320     return getCond()->isDefinedOutsideVectorRegions();
1321   }
1322 };
1323 
1324 /// A recipe for handling GEP instructions.
1325 class VPWidenGEPRecipe : public VPRecipeWithIRFlags, public VPValue {
1326   bool isPointerLoopInvariant() const {
1327     return getOperand(0)->isDefinedOutsideVectorRegions();
1328   }
1329 
1330   bool isIndexLoopInvariant(unsigned I) const {
1331     return getOperand(I + 1)->isDefinedOutsideVectorRegions();
1332   }
1333 
1334   bool areAllOperandsInvariant() const {
1335     return all_of(operands(), [](VPValue *Op) {
1336       return Op->isDefinedOutsideVectorRegions();
1337     });
1338   }
1339 
1340 public:
1341   template <typename IterT>
1342   VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands)
1343       : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP),
1344         VPValue(this, GEP) {}
1345 
1346   ~VPWidenGEPRecipe() override = default;
1347 
1348   VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1349 
1350   /// Generate the gep nodes.
1351   void execute(VPTransformState &State) override;
1352 
1353 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1354   /// Print the recipe.
1355   void print(raw_ostream &O, const Twine &Indent,
1356              VPSlotTracker &SlotTracker) const override;
1357 #endif
1358 };
1359 
1360 /// A pure virtual base class for all recipes modeling header phis, including
1361 /// phis for first order recurrences, pointer inductions and reductions. The
1362 /// start value is the first operand of the recipe and the incoming value from
1363 /// the backedge is the second operand.
1364 ///
1365 /// Inductions are modeled using the following sub-classes:
1366 ///  * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
1367 ///    starting at a specified value (zero for the main vector loop, the resume
1368 ///    value for the epilogue vector loop) and stepping by 1. The induction
1369 ///    controls exiting of the vector loop by comparing against the vector trip
1370 ///    count. Produces a single scalar PHI for the induction value per
1371 ///    iteration.
1372 ///  * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
1373 ///    floating point inductions with arbitrary start and step values. Produces
1374 ///    a vector PHI per-part.
1375 ///  * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
1376 ///    value of an IV with different start and step values. Produces a single
1377 ///    scalar value per iteration
1378 ///  * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
1379 ///    canonical or derived induction.
1380 ///  * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
1381 ///    pointer induction. Produces either a vector PHI per-part or scalar values
1382 ///    per-lane based on the canonical induction.
1383 class VPHeaderPHIRecipe : public VPRecipeBase, public VPValue {
1384 protected:
1385   VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
1386                     VPValue *Start = nullptr, DebugLoc DL = {})
1387       : VPRecipeBase(VPDefID, {}, DL), VPValue(this, UnderlyingInstr) {
1388     if (Start)
1389       addOperand(Start);
1390   }
1391 
1392 public:
1393   ~VPHeaderPHIRecipe() override = default;
1394 
1395   /// Method to support type inquiry through isa, cast, and dyn_cast.
1396   static inline bool classof(const VPRecipeBase *B) {
1397     return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1398            B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
1399   }
1400   static inline bool classof(const VPValue *V) {
1401     auto *B = V->getDefiningRecipe();
1402     return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
1403            B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
1404   }
1405 
1406   /// Generate the phi nodes.
1407   void execute(VPTransformState &State) override = 0;
1408 
1409 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1410   /// Print the recipe.
1411   void print(raw_ostream &O, const Twine &Indent,
1412              VPSlotTracker &SlotTracker) const override = 0;
1413 #endif
1414 
1415   /// Returns the start value of the phi, if one is set.
1416   VPValue *getStartValue() {
1417     return getNumOperands() == 0 ? nullptr : getOperand(0);
1418   }
1419   VPValue *getStartValue() const {
1420     return getNumOperands() == 0 ? nullptr : getOperand(0);
1421   }
1422 
1423   /// Update the start value of the recipe.
1424   void setStartValue(VPValue *V) { setOperand(0, V); }
1425 
1426   /// Returns the incoming value from the loop backedge.
1427   virtual VPValue *getBackedgeValue() {
1428     return getOperand(1);
1429   }
1430 
1431   /// Returns the backedge value as a recipe. The backedge value is guaranteed
1432   /// to be a recipe.
1433   virtual VPRecipeBase &getBackedgeRecipe() {
1434     return *getBackedgeValue()->getDefiningRecipe();
1435   }
1436 };
1437 
1438 /// A recipe for handling phi nodes of integer and floating-point inductions,
1439 /// producing their vector values.
1440 class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
1441   PHINode *IV;
1442   TruncInst *Trunc;
1443   const InductionDescriptor &IndDesc;
1444 
1445 public:
1446   VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1447                                 const InductionDescriptor &IndDesc)
1448       : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV),
1449         Trunc(nullptr), IndDesc(IndDesc) {
1450     addOperand(Step);
1451   }
1452 
1453   VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1454                                 const InductionDescriptor &IndDesc,
1455                                 TruncInst *Trunc)
1456       : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start),
1457         IV(IV), Trunc(Trunc), IndDesc(IndDesc) {
1458     addOperand(Step);
1459   }
1460 
1461   ~VPWidenIntOrFpInductionRecipe() override = default;
1462 
1463   VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
1464 
1465   /// Generate the vectorized and scalarized versions of the phi node as
1466   /// needed by their users.
1467   void execute(VPTransformState &State) override;
1468 
1469 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1470   /// Print the recipe.
1471   void print(raw_ostream &O, const Twine &Indent,
1472              VPSlotTracker &SlotTracker) const override;
1473 #endif
1474 
1475   VPValue *getBackedgeValue() override {
1476     // TODO: All operands of base recipe must exist and be at same index in
1477     // derived recipe.
1478     llvm_unreachable(
1479         "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1480   }
1481 
1482   VPRecipeBase &getBackedgeRecipe() override {
1483     // TODO: All operands of base recipe must exist and be at same index in
1484     // derived recipe.
1485     llvm_unreachable(
1486         "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1487   }
1488 
1489   /// Returns the step value of the induction.
1490   VPValue *getStepValue() { return getOperand(1); }
1491   const VPValue *getStepValue() const { return getOperand(1); }
1492 
1493   /// Returns the first defined value as TruncInst, if it is one or nullptr
1494   /// otherwise.
1495   TruncInst *getTruncInst() { return Trunc; }
1496   const TruncInst *getTruncInst() const { return Trunc; }
1497 
1498   PHINode *getPHINode() { return IV; }
1499 
1500   /// Returns the induction descriptor for the recipe.
1501   const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1502 
1503   /// Returns true if the induction is canonical, i.e. starting at 0 and
1504   /// incremented by UF * VF (= the original IV is incremented by 1).
1505   bool isCanonical() const;
1506 
1507   /// Returns the scalar type of the induction.
1508   Type *getScalarType() const {
1509     return Trunc ? Trunc->getType() : IV->getType();
1510   }
1511 };
1512 
1513 class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
1514   const InductionDescriptor &IndDesc;
1515 
1516   bool IsScalarAfterVectorization;
1517 
1518 public:
1519   /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
1520   /// Start.
1521   VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
1522                                 const InductionDescriptor &IndDesc,
1523                                 bool IsScalarAfterVectorization)
1524       : VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi),
1525         IndDesc(IndDesc),
1526         IsScalarAfterVectorization(IsScalarAfterVectorization) {
1527     addOperand(Start);
1528     addOperand(Step);
1529   }
1530 
1531   ~VPWidenPointerInductionRecipe() override = default;
1532 
1533   VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
1534 
1535   /// Generate vector values for the pointer induction.
1536   void execute(VPTransformState &State) override;
1537 
1538   /// Returns true if only scalar values will be generated.
1539   bool onlyScalarsGenerated(ElementCount VF);
1540 
1541   /// Returns the induction descriptor for the recipe.
1542   const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1543 
1544 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1545   /// Print the recipe.
1546   void print(raw_ostream &O, const Twine &Indent,
1547              VPSlotTracker &SlotTracker) const override;
1548 #endif
1549 };
1550 
1551 /// A recipe for handling header phis that are widened in the vector loop.
1552 /// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
1553 /// managed in the recipe directly.
1554 class VPWidenPHIRecipe : public VPHeaderPHIRecipe {
1555   /// List of incoming blocks. Only used in the VPlan native path.
1556   SmallVector<VPBasicBlock *, 2> IncomingBlocks;
1557 
1558 public:
1559   /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
1560   VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
1561       : VPHeaderPHIRecipe(VPDef::VPWidenPHISC, Phi) {
1562     if (Start)
1563       addOperand(Start);
1564   }
1565 
1566   ~VPWidenPHIRecipe() override = default;
1567 
1568   VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
1569 
1570   /// Generate the phi/select nodes.
1571   void execute(VPTransformState &State) override;
1572 
1573 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1574   /// Print the recipe.
1575   void print(raw_ostream &O, const Twine &Indent,
1576              VPSlotTracker &SlotTracker) const override;
1577 #endif
1578 
1579   /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
1580   void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
1581     addOperand(IncomingV);
1582     IncomingBlocks.push_back(IncomingBlock);
1583   }
1584 
1585   /// Returns the \p I th incoming VPBasicBlock.
1586   VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
1587 
1588   /// Returns the \p I th incoming VPValue.
1589   VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
1590 };
1591 
1592 /// A recipe for handling first-order recurrence phis. The start value is the
1593 /// first operand of the recipe and the incoming value from the backedge is the
1594 /// second operand.
1595 struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
1596   VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
1597       : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
1598 
1599   VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
1600 
1601   static inline bool classof(const VPHeaderPHIRecipe *R) {
1602     return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
1603   }
1604 
1605   void execute(VPTransformState &State) override;
1606 
1607 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1608   /// Print the recipe.
1609   void print(raw_ostream &O, const Twine &Indent,
1610              VPSlotTracker &SlotTracker) const override;
1611 #endif
1612 };
1613 
1614 /// A recipe for handling reduction phis. The start value is the first operand
1615 /// of the recipe and the incoming value from the backedge is the second
1616 /// operand.
1617 class VPReductionPHIRecipe : public VPHeaderPHIRecipe {
1618   /// Descriptor for the reduction.
1619   const RecurrenceDescriptor &RdxDesc;
1620 
1621   /// The phi is part of an in-loop reduction.
1622   bool IsInLoop;
1623 
1624   /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
1625   bool IsOrdered;
1626 
1627 public:
1628   /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
1629   /// RdxDesc.
1630   VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc,
1631                        VPValue &Start, bool IsInLoop = false,
1632                        bool IsOrdered = false)
1633       : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
1634         RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
1635     assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
1636   }
1637 
1638   ~VPReductionPHIRecipe() override = default;
1639 
1640   VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
1641 
1642   static inline bool classof(const VPHeaderPHIRecipe *R) {
1643     return R->getVPDefID() == VPDef::VPReductionPHISC;
1644   }
1645 
1646   /// Generate the phi/select nodes.
1647   void execute(VPTransformState &State) override;
1648 
1649 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1650   /// Print the recipe.
1651   void print(raw_ostream &O, const Twine &Indent,
1652              VPSlotTracker &SlotTracker) const override;
1653 #endif
1654 
1655   const RecurrenceDescriptor &getRecurrenceDescriptor() const {
1656     return RdxDesc;
1657   }
1658 
1659   /// Returns true, if the phi is part of an ordered reduction.
1660   bool isOrdered() const { return IsOrdered; }
1661 
1662   /// Returns true, if the phi is part of an in-loop reduction.
1663   bool isInLoop() const { return IsInLoop; }
1664 };
1665 
1666 /// A recipe for vectorizing a phi-node as a sequence of mask-based select
1667 /// instructions.
1668 class VPBlendRecipe : public VPRecipeBase, public VPValue {
1669 public:
1670   /// The blend operation is a User of the incoming values and of their
1671   /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value
1672   /// might be incoming with a full mask for which there is no VPValue.
1673   VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
1674       : VPRecipeBase(VPDef::VPBlendSC, Operands, Phi->getDebugLoc()),
1675         VPValue(this, Phi) {
1676     assert(Operands.size() > 0 &&
1677            ((Operands.size() == 1) || (Operands.size() % 2 == 0)) &&
1678            "Expected either a single incoming value or a positive even number "
1679            "of operands");
1680   }
1681 
1682   VP_CLASSOF_IMPL(VPDef::VPBlendSC)
1683 
1684   /// Return the number of incoming values, taking into account that a single
1685   /// incoming value has no mask.
1686   unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
1687 
1688   /// Return incoming value number \p Idx.
1689   VPValue *getIncomingValue(unsigned Idx) const { return getOperand(Idx * 2); }
1690 
1691   /// Return mask number \p Idx.
1692   VPValue *getMask(unsigned Idx) const { return getOperand(Idx * 2 + 1); }
1693 
1694   /// Generate the phi/select nodes.
1695   void execute(VPTransformState &State) override;
1696 
1697 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1698   /// Print the recipe.
1699   void print(raw_ostream &O, const Twine &Indent,
1700              VPSlotTracker &SlotTracker) const override;
1701 #endif
1702 
1703   /// Returns true if the recipe only uses the first lane of operand \p Op.
1704   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1705     assert(is_contained(operands(), Op) &&
1706            "Op must be an operand of the recipe");
1707     // Recursing through Blend recipes only, must terminate at header phi's the
1708     // latest.
1709     return all_of(users(),
1710                   [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
1711   }
1712 };
1713 
1714 /// VPInterleaveRecipe is a recipe for transforming an interleave group of load
1715 /// or stores into one wide load/store and shuffles. The first operand of a
1716 /// VPInterleave recipe is the address, followed by the stored values, followed
1717 /// by an optional mask.
1718 class VPInterleaveRecipe : public VPRecipeBase {
1719   const InterleaveGroup<Instruction> *IG;
1720 
1721   /// Indicates if the interleave group is in a conditional block and requires a
1722   /// mask.
1723   bool HasMask = false;
1724 
1725   /// Indicates if gaps between members of the group need to be masked out or if
1726   /// unusued gaps can be loaded speculatively.
1727   bool NeedsMaskForGaps = false;
1728 
1729 public:
1730   VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
1731                      ArrayRef<VPValue *> StoredValues, VPValue *Mask,
1732                      bool NeedsMaskForGaps)
1733       : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG),
1734         NeedsMaskForGaps(NeedsMaskForGaps) {
1735     for (unsigned i = 0; i < IG->getFactor(); ++i)
1736       if (Instruction *I = IG->getMember(i)) {
1737         if (I->getType()->isVoidTy())
1738           continue;
1739         new VPValue(I, this);
1740       }
1741 
1742     for (auto *SV : StoredValues)
1743       addOperand(SV);
1744     if (Mask) {
1745       HasMask = true;
1746       addOperand(Mask);
1747     }
1748   }
1749   ~VPInterleaveRecipe() override = default;
1750 
1751   VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
1752 
1753   /// Return the address accessed by this recipe.
1754   VPValue *getAddr() const {
1755     return getOperand(0); // Address is the 1st, mandatory operand.
1756   }
1757 
1758   /// Return the mask used by this recipe. Note that a full mask is represented
1759   /// by a nullptr.
1760   VPValue *getMask() const {
1761     // Mask is optional and therefore the last, currently 2nd operand.
1762     return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
1763   }
1764 
1765   /// Return the VPValues stored by this interleave group. If it is a load
1766   /// interleave group, return an empty ArrayRef.
1767   ArrayRef<VPValue *> getStoredValues() const {
1768     // The first operand is the address, followed by the stored values, followed
1769     // by an optional mask.
1770     return ArrayRef<VPValue *>(op_begin(), getNumOperands())
1771         .slice(1, getNumStoreOperands());
1772   }
1773 
1774   /// Generate the wide load or store, and shuffles.
1775   void execute(VPTransformState &State) override;
1776 
1777 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1778   /// Print the recipe.
1779   void print(raw_ostream &O, const Twine &Indent,
1780              VPSlotTracker &SlotTracker) const override;
1781 #endif
1782 
1783   const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
1784 
1785   /// Returns the number of stored operands of this interleave group. Returns 0
1786   /// for load interleave groups.
1787   unsigned getNumStoreOperands() const {
1788     return getNumOperands() - (HasMask ? 2 : 1);
1789   }
1790 
1791   /// The recipe only uses the first lane of the address.
1792   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1793     assert(is_contained(operands(), Op) &&
1794            "Op must be an operand of the recipe");
1795     return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
1796   }
1797 };
1798 
1799 /// A recipe to represent inloop reduction operations, performing a reduction on
1800 /// a vector operand into a scalar value, and adding the result to a chain.
1801 /// The Operands are {ChainOp, VecOp, [Condition]}.
1802 class VPReductionRecipe : public VPRecipeBase, public VPValue {
1803   /// The recurrence decriptor for the reduction in question.
1804   const RecurrenceDescriptor &RdxDesc;
1805 
1806 public:
1807   VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
1808                     VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp)
1809       : VPRecipeBase(VPDef::VPReductionSC, {ChainOp, VecOp}), VPValue(this, I),
1810         RdxDesc(R) {
1811     if (CondOp)
1812       addOperand(CondOp);
1813   }
1814 
1815   ~VPReductionRecipe() override = default;
1816 
1817   VP_CLASSOF_IMPL(VPDef::VPReductionSC)
1818 
1819   /// Generate the reduction in the loop
1820   void execute(VPTransformState &State) override;
1821 
1822 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1823   /// Print the recipe.
1824   void print(raw_ostream &O, const Twine &Indent,
1825              VPSlotTracker &SlotTracker) const override;
1826 #endif
1827 
1828   /// The VPValue of the scalar Chain being accumulated.
1829   VPValue *getChainOp() const { return getOperand(0); }
1830   /// The VPValue of the vector value to be reduced.
1831   VPValue *getVecOp() const { return getOperand(1); }
1832   /// The VPValue of the condition for the block.
1833   VPValue *getCondOp() const {
1834     return getNumOperands() > 2 ? getOperand(2) : nullptr;
1835   }
1836 };
1837 
1838 /// VPReplicateRecipe replicates a given instruction producing multiple scalar
1839 /// copies of the original scalar type, one per lane, instead of producing a
1840 /// single copy of widened type for all lanes. If the instruction is known to be
1841 /// uniform only one copy, per lane zero, will be generated.
1842 class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPValue {
1843   /// Indicator if only a single replica per lane is needed.
1844   bool IsUniform;
1845 
1846   /// Indicator if the replicas are also predicated.
1847   bool IsPredicated;
1848 
1849 public:
1850   template <typename IterT>
1851   VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,
1852                     bool IsUniform, VPValue *Mask = nullptr)
1853       : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
1854         VPValue(this, I), IsUniform(IsUniform), IsPredicated(Mask) {
1855     if (Mask)
1856       addOperand(Mask);
1857   }
1858 
1859   ~VPReplicateRecipe() override = default;
1860 
1861   VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
1862 
1863   /// Generate replicas of the desired Ingredient. Replicas will be generated
1864   /// for all parts and lanes unless a specific part and lane are specified in
1865   /// the \p State.
1866   void execute(VPTransformState &State) override;
1867 
1868 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1869   /// Print the recipe.
1870   void print(raw_ostream &O, const Twine &Indent,
1871              VPSlotTracker &SlotTracker) const override;
1872 #endif
1873 
1874   bool isUniform() const { return IsUniform; }
1875 
1876   bool isPredicated() const { return IsPredicated; }
1877 
1878   /// Returns true if the recipe only uses the first lane of operand \p Op.
1879   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1880     assert(is_contained(operands(), Op) &&
1881            "Op must be an operand of the recipe");
1882     return isUniform();
1883   }
1884 
1885   /// Returns true if the recipe uses scalars of operand \p Op.
1886   bool usesScalars(const VPValue *Op) const override {
1887     assert(is_contained(operands(), Op) &&
1888            "Op must be an operand of the recipe");
1889     return true;
1890   }
1891 
1892   /// Returns true if the recipe is used by a widened recipe via an intervening
1893   /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
1894   /// in a vector.
1895   bool shouldPack() const;
1896 
1897   /// Return the mask of a predicated VPReplicateRecipe.
1898   VPValue *getMask() {
1899     assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
1900     return getOperand(getNumOperands() - 1);
1901   }
1902 };
1903 
1904 /// A recipe for generating conditional branches on the bits of a mask.
1905 class VPBranchOnMaskRecipe : public VPRecipeBase {
1906 public:
1907   VPBranchOnMaskRecipe(VPValue *BlockInMask)
1908       : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {
1909     if (BlockInMask) // nullptr means all-one mask.
1910       addOperand(BlockInMask);
1911   }
1912 
1913   VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
1914 
1915   /// Generate the extraction of the appropriate bit from the block mask and the
1916   /// conditional branch.
1917   void execute(VPTransformState &State) override;
1918 
1919 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1920   /// Print the recipe.
1921   void print(raw_ostream &O, const Twine &Indent,
1922              VPSlotTracker &SlotTracker) const override {
1923     O << Indent << "BRANCH-ON-MASK ";
1924     if (VPValue *Mask = getMask())
1925       Mask->printAsOperand(O, SlotTracker);
1926     else
1927       O << " All-One";
1928   }
1929 #endif
1930 
1931   /// Return the mask used by this recipe. Note that a full mask is represented
1932   /// by a nullptr.
1933   VPValue *getMask() const {
1934     assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
1935     // Mask is optional.
1936     return getNumOperands() == 1 ? getOperand(0) : nullptr;
1937   }
1938 
1939   /// Returns true if the recipe uses scalars of operand \p Op.
1940   bool usesScalars(const VPValue *Op) const override {
1941     assert(is_contained(operands(), Op) &&
1942            "Op must be an operand of the recipe");
1943     return true;
1944   }
1945 };
1946 
1947 /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
1948 /// control converges back from a Branch-on-Mask. The phi nodes are needed in
1949 /// order to merge values that are set under such a branch and feed their uses.
1950 /// The phi nodes can be scalar or vector depending on the users of the value.
1951 /// This recipe works in concert with VPBranchOnMaskRecipe.
1952 class VPPredInstPHIRecipe : public VPRecipeBase, public VPValue {
1953 public:
1954   /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
1955   /// nodes after merging back from a Branch-on-Mask.
1956   VPPredInstPHIRecipe(VPValue *PredV)
1957       : VPRecipeBase(VPDef::VPPredInstPHISC, PredV), VPValue(this) {}
1958   ~VPPredInstPHIRecipe() override = default;
1959 
1960   VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
1961 
1962   /// Generates phi nodes for live-outs as needed to retain SSA form.
1963   void execute(VPTransformState &State) override;
1964 
1965 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1966   /// Print the recipe.
1967   void print(raw_ostream &O, const Twine &Indent,
1968              VPSlotTracker &SlotTracker) const override;
1969 #endif
1970 
1971   /// Returns true if the recipe uses scalars of operand \p Op.
1972   bool usesScalars(const VPValue *Op) const override {
1973     assert(is_contained(operands(), Op) &&
1974            "Op must be an operand of the recipe");
1975     return true;
1976   }
1977 };
1978 
1979 /// A Recipe for widening load/store operations.
1980 /// The recipe uses the following VPValues:
1981 /// - For load: Address, optional mask
1982 /// - For store: Address, stored value, optional mask
1983 /// TODO: We currently execute only per-part unless a specific instance is
1984 /// provided.
1985 class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
1986   Instruction &Ingredient;
1987 
1988   // Whether the loaded-from / stored-to addresses are consecutive.
1989   bool Consecutive;
1990 
1991   // Whether the consecutive loaded/stored addresses are in reverse order.
1992   bool Reverse;
1993 
1994   void setMask(VPValue *Mask) {
1995     if (!Mask)
1996       return;
1997     addOperand(Mask);
1998   }
1999 
2000   bool isMasked() const {
2001     return isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
2002   }
2003 
2004 public:
2005   VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
2006                                  bool Consecutive, bool Reverse)
2007       : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}),
2008         Ingredient(Load), Consecutive(Consecutive), Reverse(Reverse) {
2009     assert((Consecutive || !Reverse) && "Reverse implies consecutive");
2010     new VPValue(this, &Load);
2011     setMask(Mask);
2012   }
2013 
2014   VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
2015                                  VPValue *StoredValue, VPValue *Mask,
2016                                  bool Consecutive, bool Reverse)
2017       : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr, StoredValue}),
2018         Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) {
2019     assert((Consecutive || !Reverse) && "Reverse implies consecutive");
2020     setMask(Mask);
2021   }
2022 
2023   VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC)
2024 
2025   /// Return the address accessed by this recipe.
2026   VPValue *getAddr() const {
2027     return getOperand(0); // Address is the 1st, mandatory operand.
2028   }
2029 
2030   /// Return the mask used by this recipe. Note that a full mask is represented
2031   /// by a nullptr.
2032   VPValue *getMask() const {
2033     // Mask is optional and therefore the last operand.
2034     return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
2035   }
2036 
2037   /// Returns true if this recipe is a store.
2038   bool isStore() const { return isa<StoreInst>(Ingredient); }
2039 
2040   /// Return the address accessed by this recipe.
2041   VPValue *getStoredValue() const {
2042     assert(isStore() && "Stored value only available for store instructions");
2043     return getOperand(1); // Stored value is the 2nd, mandatory operand.
2044   }
2045 
2046   // Return whether the loaded-from / stored-to addresses are consecutive.
2047   bool isConsecutive() const { return Consecutive; }
2048 
2049   // Return whether the consecutive loaded/stored addresses are in reverse
2050   // order.
2051   bool isReverse() const { return Reverse; }
2052 
2053   /// Generate the wide load/store.
2054   void execute(VPTransformState &State) override;
2055 
2056 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2057   /// Print the recipe.
2058   void print(raw_ostream &O, const Twine &Indent,
2059              VPSlotTracker &SlotTracker) const override;
2060 #endif
2061 
2062   /// Returns true if the recipe only uses the first lane of operand \p Op.
2063   bool onlyFirstLaneUsed(const VPValue *Op) const override {
2064     assert(is_contained(operands(), Op) &&
2065            "Op must be an operand of the recipe");
2066 
2067     // Widened, consecutive memory operations only demand the first lane of
2068     // their address, unless the same operand is also stored. That latter can
2069     // happen with opaque pointers.
2070     return Op == getAddr() && isConsecutive() &&
2071            (!isStore() || Op != getStoredValue());
2072   }
2073 
2074   Instruction &getIngredient() const { return Ingredient; }
2075 };
2076 
2077 /// Recipe to expand a SCEV expression.
2078 class VPExpandSCEVRecipe : public VPRecipeBase, public VPValue {
2079   const SCEV *Expr;
2080   ScalarEvolution &SE;
2081 
2082 public:
2083   VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
2084       : VPRecipeBase(VPDef::VPExpandSCEVSC, {}), VPValue(this), Expr(Expr),
2085         SE(SE) {}
2086 
2087   ~VPExpandSCEVRecipe() override = default;
2088 
2089   VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
2090 
2091   /// Generate a canonical vector induction variable of the vector loop, with
2092   void execute(VPTransformState &State) override;
2093 
2094 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2095   /// Print the recipe.
2096   void print(raw_ostream &O, const Twine &Indent,
2097              VPSlotTracker &SlotTracker) const override;
2098 #endif
2099 
2100   const SCEV *getSCEV() const { return Expr; }
2101 };
2102 
2103 /// Canonical scalar induction phi of the vector loop. Starting at the specified
2104 /// start value (either 0 or the resume value when vectorizing the epilogue
2105 /// loop). VPWidenCanonicalIVRecipe represents the vector version of the
2106 /// canonical induction variable.
2107 class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
2108 public:
2109   VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
2110       : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
2111 
2112   ~VPCanonicalIVPHIRecipe() override = default;
2113 
2114   VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
2115 
2116   static inline bool classof(const VPHeaderPHIRecipe *D) {
2117     return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
2118   }
2119 
2120   /// Generate the canonical scalar induction phi of the vector loop.
2121   void execute(VPTransformState &State) override;
2122 
2123 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2124   /// Print the recipe.
2125   void print(raw_ostream &O, const Twine &Indent,
2126              VPSlotTracker &SlotTracker) const override;
2127 #endif
2128 
2129   /// Returns the scalar type of the induction.
2130   Type *getScalarType() const {
2131     return getStartValue()->getLiveInIRValue()->getType();
2132   }
2133 
2134   /// Returns true if the recipe only uses the first lane of operand \p Op.
2135   bool onlyFirstLaneUsed(const VPValue *Op) const override {
2136     assert(is_contained(operands(), Op) &&
2137            "Op must be an operand of the recipe");
2138     return true;
2139   }
2140 
2141   /// Returns true if the recipe only uses the first part of operand \p Op.
2142   bool onlyFirstPartUsed(const VPValue *Op) const override {
2143     assert(is_contained(operands(), Op) &&
2144            "Op must be an operand of the recipe");
2145     return true;
2146   }
2147 
2148   /// Check if the induction described by \p Kind, /p Start and \p Step is
2149   /// canonical, i.e.  has the same start, step (of 1), and type as the
2150   /// canonical IV.
2151   bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start,
2152                    VPValue *Step, Type *Ty) const;
2153 };
2154 
2155 /// A recipe for generating the active lane mask for the vector loop that is
2156 /// used to predicate the vector operations.
2157 /// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2158 /// remove VPActiveLaneMaskPHIRecipe.
2159 class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
2160 public:
2161   VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
2162       : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
2163                           DL) {}
2164 
2165   ~VPActiveLaneMaskPHIRecipe() override = default;
2166 
2167   VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
2168 
2169   static inline bool classof(const VPHeaderPHIRecipe *D) {
2170     return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
2171   }
2172 
2173   /// Generate the active lane mask phi of the vector loop.
2174   void execute(VPTransformState &State) override;
2175 
2176 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2177   /// Print the recipe.
2178   void print(raw_ostream &O, const Twine &Indent,
2179              VPSlotTracker &SlotTracker) const override;
2180 #endif
2181 };
2182 
2183 /// A Recipe for widening the canonical induction variable of the vector loop.
2184 class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue {
2185 public:
2186   VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
2187       : VPRecipeBase(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}),
2188         VPValue(this) {}
2189 
2190   ~VPWidenCanonicalIVRecipe() override = default;
2191 
2192   VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
2193 
2194   /// Generate a canonical vector induction variable of the vector loop, with
2195   /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
2196   /// step = <VF*UF, VF*UF, ..., VF*UF>.
2197   void execute(VPTransformState &State) override;
2198 
2199 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2200   /// Print the recipe.
2201   void print(raw_ostream &O, const Twine &Indent,
2202              VPSlotTracker &SlotTracker) const override;
2203 #endif
2204 
2205   /// Returns the scalar type of the induction.
2206   const Type *getScalarType() const {
2207     return cast<VPCanonicalIVPHIRecipe>(getOperand(0)->getDefiningRecipe())
2208         ->getScalarType();
2209   }
2210 };
2211 
2212 /// A recipe for converting the canonical IV value to the corresponding value of
2213 /// an IV with different start and step values, using Start + CanonicalIV *
2214 /// Step.
2215 class VPDerivedIVRecipe : public VPRecipeBase, public VPValue {
2216   /// If not nullptr, the result of the induction will get truncated to
2217   /// TruncResultTy.
2218   Type *TruncResultTy;
2219 
2220   /// Kind of the induction.
2221   const InductionDescriptor::InductionKind Kind;
2222   /// If not nullptr, the floating point induction binary operator. Must be set
2223   /// for floating point inductions.
2224   const FPMathOperator *FPBinOp;
2225 
2226 public:
2227   VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
2228                     VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
2229                     Type *TruncResultTy)
2230       : VPRecipeBase(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
2231         VPValue(this), TruncResultTy(TruncResultTy), Kind(IndDesc.getKind()),
2232         FPBinOp(dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())) {
2233   }
2234 
2235   ~VPDerivedIVRecipe() override = default;
2236 
2237   VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
2238 
2239   /// Generate the transformed value of the induction at offset StartValue (1.
2240   /// operand) + IV (2. operand) * StepValue (3, operand).
2241   void execute(VPTransformState &State) override;
2242 
2243 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2244   /// Print the recipe.
2245   void print(raw_ostream &O, const Twine &Indent,
2246              VPSlotTracker &SlotTracker) const override;
2247 #endif
2248 
2249   Type *getScalarType() const {
2250     return TruncResultTy ? TruncResultTy
2251                          : getStartValue()->getLiveInIRValue()->getType();
2252   }
2253 
2254   VPValue *getStartValue() const { return getOperand(0); }
2255   VPValue *getCanonicalIV() const { return getOperand(1); }
2256   VPValue *getStepValue() const { return getOperand(2); }
2257 
2258   /// Returns true if the recipe only uses the first lane of operand \p Op.
2259   bool onlyFirstLaneUsed(const VPValue *Op) const override {
2260     assert(is_contained(operands(), Op) &&
2261            "Op must be an operand of the recipe");
2262     return true;
2263   }
2264 };
2265 
2266 /// A recipe for handling phi nodes of integer and floating-point inductions,
2267 /// producing their scalar values.
2268 class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags, public VPValue {
2269   Instruction::BinaryOps InductionOpcode;
2270 
2271 public:
2272   VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step,
2273                         Instruction::BinaryOps Opcode, FastMathFlags FMFs)
2274       : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
2275                             ArrayRef<VPValue *>({IV, Step}), FMFs),
2276         VPValue(this), InductionOpcode(Opcode) {}
2277 
2278   VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV,
2279                         VPValue *Step)
2280       : VPScalarIVStepsRecipe(
2281             IV, Step, IndDesc.getInductionOpcode(),
2282             dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
2283                 ? IndDesc.getInductionBinOp()->getFastMathFlags()
2284                 : FastMathFlags()) {}
2285 
2286   ~VPScalarIVStepsRecipe() override = default;
2287 
2288   VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
2289 
2290   /// Generate the scalarized versions of the phi node as needed by their users.
2291   void execute(VPTransformState &State) override;
2292 
2293 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2294   /// Print the recipe.
2295   void print(raw_ostream &O, const Twine &Indent,
2296              VPSlotTracker &SlotTracker) const override;
2297 #endif
2298 
2299   VPValue *getStepValue() const { return getOperand(1); }
2300 
2301   /// Returns true if the recipe only uses the first lane of operand \p Op.
2302   bool onlyFirstLaneUsed(const VPValue *Op) const override {
2303     assert(is_contained(operands(), Op) &&
2304            "Op must be an operand of the recipe");
2305     return true;
2306   }
2307 };
2308 
2309 /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
2310 /// holds a sequence of zero or more VPRecipe's each representing a sequence of
2311 /// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
2312 class VPBasicBlock : public VPBlockBase {
2313 public:
2314   using RecipeListTy = iplist<VPRecipeBase>;
2315 
2316 private:
2317   /// The VPRecipes held in the order of output instructions to generate.
2318   RecipeListTy Recipes;
2319 
2320 public:
2321   VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
2322       : VPBlockBase(VPBasicBlockSC, Name.str()) {
2323     if (Recipe)
2324       appendRecipe(Recipe);
2325   }
2326 
2327   ~VPBasicBlock() override {
2328     while (!Recipes.empty())
2329       Recipes.pop_back();
2330   }
2331 
2332   /// Instruction iterators...
2333   using iterator = RecipeListTy::iterator;
2334   using const_iterator = RecipeListTy::const_iterator;
2335   using reverse_iterator = RecipeListTy::reverse_iterator;
2336   using const_reverse_iterator = RecipeListTy::const_reverse_iterator;
2337 
2338   //===--------------------------------------------------------------------===//
2339   /// Recipe iterator methods
2340   ///
2341   inline iterator begin() { return Recipes.begin(); }
2342   inline const_iterator begin() const { return Recipes.begin(); }
2343   inline iterator end() { return Recipes.end(); }
2344   inline const_iterator end() const { return Recipes.end(); }
2345 
2346   inline reverse_iterator rbegin() { return Recipes.rbegin(); }
2347   inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
2348   inline reverse_iterator rend() { return Recipes.rend(); }
2349   inline const_reverse_iterator rend() const { return Recipes.rend(); }
2350 
2351   inline size_t size() const { return Recipes.size(); }
2352   inline bool empty() const { return Recipes.empty(); }
2353   inline const VPRecipeBase &front() const { return Recipes.front(); }
2354   inline VPRecipeBase &front() { return Recipes.front(); }
2355   inline const VPRecipeBase &back() const { return Recipes.back(); }
2356   inline VPRecipeBase &back() { return Recipes.back(); }
2357 
2358   /// Returns a reference to the list of recipes.
2359   RecipeListTy &getRecipeList() { return Recipes; }
2360 
2361   /// Returns a pointer to a member of the recipe list.
2362   static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
2363     return &VPBasicBlock::Recipes;
2364   }
2365 
2366   /// Method to support type inquiry through isa, cast, and dyn_cast.
2367   static inline bool classof(const VPBlockBase *V) {
2368     return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC;
2369   }
2370 
2371   void insert(VPRecipeBase *Recipe, iterator InsertPt) {
2372     assert(Recipe && "No recipe to append.");
2373     assert(!Recipe->Parent && "Recipe already in VPlan");
2374     Recipe->Parent = this;
2375     Recipes.insert(InsertPt, Recipe);
2376   }
2377 
2378   /// Augment the existing recipes of a VPBasicBlock with an additional
2379   /// \p Recipe as the last recipe.
2380   void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
2381 
2382   /// The method which generates the output IR instructions that correspond to
2383   /// this VPBasicBlock, thereby "executing" the VPlan.
2384   void execute(VPTransformState *State) override;
2385 
2386   /// Return the position of the first non-phi node recipe in the block.
2387   iterator getFirstNonPhi();
2388 
2389   /// Returns an iterator range over the PHI-like recipes in the block.
2390   iterator_range<iterator> phis() {
2391     return make_range(begin(), getFirstNonPhi());
2392   }
2393 
2394   void dropAllReferences(VPValue *NewValue) override;
2395 
2396   /// Split current block at \p SplitAt by inserting a new block between the
2397   /// current block and its successors and moving all recipes starting at
2398   /// SplitAt to the new block. Returns the new block.
2399   VPBasicBlock *splitAt(iterator SplitAt);
2400 
2401   VPRegionBlock *getEnclosingLoopRegion();
2402 
2403 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2404   /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
2405   /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
2406   ///
2407   /// Note that the numbering is applied to the whole VPlan, so printing
2408   /// individual blocks is consistent with the whole VPlan printing.
2409   void print(raw_ostream &O, const Twine &Indent,
2410              VPSlotTracker &SlotTracker) const override;
2411   using VPBlockBase::print; // Get the print(raw_stream &O) version.
2412 #endif
2413 
2414   /// If the block has multiple successors, return the branch recipe terminating
2415   /// the block. If there are no or only a single successor, return nullptr;
2416   VPRecipeBase *getTerminator();
2417   const VPRecipeBase *getTerminator() const;
2418 
2419   /// Returns true if the block is exiting it's parent region.
2420   bool isExiting() const;
2421 
2422 private:
2423   /// Create an IR BasicBlock to hold the output instructions generated by this
2424   /// VPBasicBlock, and return it. Update the CFGState accordingly.
2425   BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
2426 };
2427 
2428 /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
2429 /// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
2430 /// A VPRegionBlock may indicate that its contents are to be replicated several
2431 /// times. This is designed to support predicated scalarization, in which a
2432 /// scalar if-then code structure needs to be generated VF * UF times. Having
2433 /// this replication indicator helps to keep a single model for multiple
2434 /// candidate VF's. The actual replication takes place only once the desired VF
2435 /// and UF have been determined.
2436 class VPRegionBlock : public VPBlockBase {
2437   /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
2438   VPBlockBase *Entry;
2439 
2440   /// Hold the Single Exiting block of the SESE region modelled by the
2441   /// VPRegionBlock.
2442   VPBlockBase *Exiting;
2443 
2444   /// An indicator whether this region is to generate multiple replicated
2445   /// instances of output IR corresponding to its VPBlockBases.
2446   bool IsReplicator;
2447 
2448 public:
2449   VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
2450                 const std::string &Name = "", bool IsReplicator = false)
2451       : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
2452         IsReplicator(IsReplicator) {
2453     assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
2454     assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
2455     Entry->setParent(this);
2456     Exiting->setParent(this);
2457   }
2458   VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
2459       : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
2460         IsReplicator(IsReplicator) {}
2461 
2462   ~VPRegionBlock() override {
2463     if (Entry) {
2464       VPValue DummyValue;
2465       Entry->dropAllReferences(&DummyValue);
2466       deleteCFG(Entry);
2467     }
2468   }
2469 
2470   /// Method to support type inquiry through isa, cast, and dyn_cast.
2471   static inline bool classof(const VPBlockBase *V) {
2472     return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
2473   }
2474 
2475   const VPBlockBase *getEntry() const { return Entry; }
2476   VPBlockBase *getEntry() { return Entry; }
2477 
2478   /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
2479   /// EntryBlock must have no predecessors.
2480   void setEntry(VPBlockBase *EntryBlock) {
2481     assert(EntryBlock->getPredecessors().empty() &&
2482            "Entry block cannot have predecessors.");
2483     Entry = EntryBlock;
2484     EntryBlock->setParent(this);
2485   }
2486 
2487   const VPBlockBase *getExiting() const { return Exiting; }
2488   VPBlockBase *getExiting() { return Exiting; }
2489 
2490   /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
2491   /// ExitingBlock must have no successors.
2492   void setExiting(VPBlockBase *ExitingBlock) {
2493     assert(ExitingBlock->getSuccessors().empty() &&
2494            "Exit block cannot have successors.");
2495     Exiting = ExitingBlock;
2496     ExitingBlock->setParent(this);
2497   }
2498 
2499   /// Returns the pre-header VPBasicBlock of the loop region.
2500   VPBasicBlock *getPreheaderVPBB() {
2501     assert(!isReplicator() && "should only get pre-header of loop regions");
2502     return getSinglePredecessor()->getExitingBasicBlock();
2503   }
2504 
2505   /// An indicator whether this region is to generate multiple replicated
2506   /// instances of output IR corresponding to its VPBlockBases.
2507   bool isReplicator() const { return IsReplicator; }
2508 
2509   /// The method which generates the output IR instructions that correspond to
2510   /// this VPRegionBlock, thereby "executing" the VPlan.
2511   void execute(VPTransformState *State) override;
2512 
2513   void dropAllReferences(VPValue *NewValue) override;
2514 
2515 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2516   /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
2517   /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
2518   /// consequtive numbers.
2519   ///
2520   /// Note that the numbering is applied to the whole VPlan, so printing
2521   /// individual regions is consistent with the whole VPlan printing.
2522   void print(raw_ostream &O, const Twine &Indent,
2523              VPSlotTracker &SlotTracker) const override;
2524   using VPBlockBase::print; // Get the print(raw_stream &O) version.
2525 #endif
2526 };
2527 
2528 /// VPlan models a candidate for vectorization, encoding various decisions take
2529 /// to produce efficient output IR, including which branches, basic-blocks and
2530 /// output IR instructions to generate, and their cost. VPlan holds a
2531 /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
2532 /// VPBasicBlock.
2533 class VPlan {
2534   friend class VPlanPrinter;
2535   friend class VPSlotTracker;
2536 
2537   /// Hold the single entry to the Hierarchical CFG of the VPlan, i.e. the
2538   /// preheader of the vector loop.
2539   VPBasicBlock *Entry;
2540 
2541   /// VPBasicBlock corresponding to the original preheader. Used to place
2542   /// VPExpandSCEV recipes for expressions used during skeleton creation and the
2543   /// rest of VPlan execution.
2544   VPBasicBlock *Preheader;
2545 
2546   /// Holds the VFs applicable to this VPlan.
2547   SmallSetVector<ElementCount, 2> VFs;
2548 
2549   /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
2550   /// any UF.
2551   SmallSetVector<unsigned, 2> UFs;
2552 
2553   /// Holds the name of the VPlan, for printing.
2554   std::string Name;
2555 
2556   /// Represents the trip count of the original loop, for folding
2557   /// the tail.
2558   VPValue *TripCount = nullptr;
2559 
2560   /// Represents the backedge taken count of the original loop, for folding
2561   /// the tail. It equals TripCount - 1.
2562   VPValue *BackedgeTakenCount = nullptr;
2563 
2564   /// Represents the vector trip count.
2565   VPValue VectorTripCount;
2566 
2567   /// Represents the loop-invariant VF * UF of the vector loop region.
2568   VPValue VFxUF;
2569 
2570   /// Holds a mapping between Values and their corresponding VPValue inside
2571   /// VPlan.
2572   Value2VPValueTy Value2VPValue;
2573 
2574   /// Contains all the external definitions created for this VPlan. External
2575   /// definitions are VPValues that hold a pointer to their underlying IR.
2576   SmallVector<VPValue *, 16> VPLiveInsToFree;
2577 
2578   /// Indicates whether it is safe use the Value2VPValue mapping or if the
2579   /// mapping cannot be used any longer, because it is stale.
2580   bool Value2VPValueEnabled = true;
2581 
2582   /// Values used outside the plan.
2583   MapVector<PHINode *, VPLiveOut *> LiveOuts;
2584 
2585   /// Mapping from SCEVs to the VPValues representing their expansions.
2586   /// NOTE: This mapping is temporary and will be removed once all users have
2587   /// been modeled in VPlan directly.
2588   DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
2589 
2590 public:
2591   /// Construct a VPlan with original preheader \p Preheader, trip count \p TC
2592   /// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
2593   /// be disconnected, as the bypass blocks between them are not yet modeled in
2594   /// VPlan.
2595   VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
2596       : VPlan(Preheader, Entry) {
2597     TripCount = TC;
2598   }
2599 
2600   /// Construct a VPlan with original preheader \p Preheader and \p Entry to
2601   /// the plan. At the moment, \p Preheader and \p Entry need to be
2602   /// disconnected, as the bypass blocks between them are not yet modeled in
2603   /// VPlan.
2604   VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
2605       : Entry(Entry), Preheader(Preheader) {
2606     Entry->setPlan(this);
2607     Preheader->setPlan(this);
2608     assert(Preheader->getNumSuccessors() == 0 &&
2609            Preheader->getNumPredecessors() == 0 &&
2610            "preheader must be disconnected");
2611   }
2612 
2613   ~VPlan();
2614 
2615   /// Create initial VPlan skeleton, having an "entry" VPBasicBlock (wrapping
2616   /// original scalar pre-header) which contains SCEV expansions that need to
2617   /// happen before the CFG is modified; a VPBasicBlock for the vector
2618   /// pre-header, followed by a region for the vector loop, followed by the
2619   /// middle VPBasicBlock.
2620   static VPlanPtr createInitialVPlan(const SCEV *TripCount,
2621                                      ScalarEvolution &PSE);
2622 
2623   /// Prepare the plan for execution, setting up the required live-in values.
2624   void prepareToExecute(Value *TripCount, Value *VectorTripCount,
2625                         Value *CanonicalIVStartValue, VPTransformState &State);
2626 
2627   /// Generate the IR code for this VPlan.
2628   void execute(VPTransformState *State);
2629 
2630   VPBasicBlock *getEntry() { return Entry; }
2631   const VPBasicBlock *getEntry() const { return Entry; }
2632 
2633   /// The trip count of the original loop.
2634   VPValue *getTripCount() const {
2635     assert(TripCount && "trip count needs to be set before accessing it");
2636     return TripCount;
2637   }
2638 
2639   /// The backedge taken count of the original loop.
2640   VPValue *getOrCreateBackedgeTakenCount() {
2641     if (!BackedgeTakenCount)
2642       BackedgeTakenCount = new VPValue();
2643     return BackedgeTakenCount;
2644   }
2645 
2646   /// The vector trip count.
2647   VPValue &getVectorTripCount() { return VectorTripCount; }
2648 
2649   /// Returns VF * UF of the vector loop region.
2650   VPValue &getVFxUF() { return VFxUF; }
2651 
2652   /// Mark the plan to indicate that using Value2VPValue is not safe any
2653   /// longer, because it may be stale.
2654   void disableValue2VPValue() { Value2VPValueEnabled = false; }
2655 
2656   void addVF(ElementCount VF) { VFs.insert(VF); }
2657 
2658   void setVF(ElementCount VF) {
2659     assert(hasVF(VF) && "Cannot set VF not already in plan");
2660     VFs.clear();
2661     VFs.insert(VF);
2662   }
2663 
2664   bool hasVF(ElementCount VF) { return VFs.count(VF); }
2665 
2666   bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
2667 
2668   bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
2669 
2670   void setUF(unsigned UF) {
2671     assert(hasUF(UF) && "Cannot set the UF not already in plan");
2672     UFs.clear();
2673     UFs.insert(UF);
2674   }
2675 
2676   /// Return a string with the name of the plan and the applicable VFs and UFs.
2677   std::string getName() const;
2678 
2679   void setName(const Twine &newName) { Name = newName.str(); }
2680 
2681   void addVPValue(Value *V, VPValue *VPV) {
2682     assert((Value2VPValueEnabled || VPV->isLiveIn()) &&
2683            "Value2VPValue mapping may be out of date!");
2684     assert(V && "Trying to add a null Value to VPlan");
2685     assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
2686     Value2VPValue[V] = VPV;
2687   }
2688 
2689   /// Returns the VPValue for \p V. \p OverrideAllowed can be used to disable
2690   ///   /// checking whether it is safe to query VPValues using IR Values.
2691   VPValue *getVPValue(Value *V, bool OverrideAllowed = false) {
2692     assert(V && "Trying to get the VPValue of a null Value");
2693     assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
2694     assert((Value2VPValueEnabled || OverrideAllowed ||
2695             Value2VPValue[V]->isLiveIn()) &&
2696            "Value2VPValue mapping may be out of date!");
2697     return Value2VPValue[V];
2698   }
2699 
2700   /// Gets the VPValue for \p V or adds a new live-in (if none exists yet) for
2701   /// \p V.
2702   VPValue *getVPValueOrAddLiveIn(Value *V) {
2703     assert(V && "Trying to get or add the VPValue of a null Value");
2704     if (!Value2VPValue.count(V)) {
2705       VPValue *VPV = new VPValue(V);
2706       VPLiveInsToFree.push_back(VPV);
2707       addVPValue(V, VPV);
2708     }
2709 
2710     return getVPValue(V);
2711   }
2712 
2713 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2714   /// Print the live-ins of this VPlan to \p O.
2715   void printLiveIns(raw_ostream &O) const;
2716 
2717   /// Print this VPlan to \p O.
2718   void print(raw_ostream &O) const;
2719 
2720   /// Print this VPlan in DOT format to \p O.
2721   void printDOT(raw_ostream &O) const;
2722 
2723   /// Dump the plan to stderr (for debugging).
2724   LLVM_DUMP_METHOD void dump() const;
2725 #endif
2726 
2727   /// Returns a range mapping the values the range \p Operands to their
2728   /// corresponding VPValues.
2729   iterator_range<mapped_iterator<Use *, std::function<VPValue *(Value *)>>>
2730   mapToVPValues(User::op_range Operands) {
2731     std::function<VPValue *(Value *)> Fn = [this](Value *Op) {
2732       return getVPValueOrAddLiveIn(Op);
2733     };
2734     return map_range(Operands, Fn);
2735   }
2736 
2737   /// Returns the VPRegionBlock of the vector loop.
2738   VPRegionBlock *getVectorLoopRegion() {
2739     return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
2740   }
2741   const VPRegionBlock *getVectorLoopRegion() const {
2742     return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
2743   }
2744 
2745   /// Returns the canonical induction recipe of the vector loop.
2746   VPCanonicalIVPHIRecipe *getCanonicalIV() {
2747     VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock();
2748     if (EntryVPBB->empty()) {
2749       // VPlan native path.
2750       EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
2751     }
2752     return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
2753   }
2754 
2755   void addLiveOut(PHINode *PN, VPValue *V);
2756 
2757   void removeLiveOut(PHINode *PN) {
2758     delete LiveOuts[PN];
2759     LiveOuts.erase(PN);
2760   }
2761 
2762   const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
2763     return LiveOuts;
2764   }
2765 
2766   VPValue *getSCEVExpansion(const SCEV *S) const {
2767     return SCEVToExpansion.lookup(S);
2768   }
2769 
2770   void addSCEVExpansion(const SCEV *S, VPValue *V) {
2771     assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
2772     SCEVToExpansion[S] = V;
2773   }
2774 
2775   /// \return The block corresponding to the original preheader.
2776   VPBasicBlock *getPreheader() { return Preheader; }
2777   const VPBasicBlock *getPreheader() const { return Preheader; }
2778 
2779 private:
2780   /// Add to the given dominator tree the header block and every new basic block
2781   /// that was created between it and the latch block, inclusive.
2782   static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopLatchBB,
2783                                   BasicBlock *LoopPreHeaderBB,
2784                                   BasicBlock *LoopExitBB);
2785 };
2786 
2787 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2788 /// VPlanPrinter prints a given VPlan to a given output stream. The printing is
2789 /// indented and follows the dot format.
2790 class VPlanPrinter {
2791   raw_ostream &OS;
2792   const VPlan &Plan;
2793   unsigned Depth = 0;
2794   unsigned TabWidth = 2;
2795   std::string Indent;
2796   unsigned BID = 0;
2797   SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
2798 
2799   VPSlotTracker SlotTracker;
2800 
2801   /// Handle indentation.
2802   void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
2803 
2804   /// Print a given \p Block of the Plan.
2805   void dumpBlock(const VPBlockBase *Block);
2806 
2807   /// Print the information related to the CFG edges going out of a given
2808   /// \p Block, followed by printing the successor blocks themselves.
2809   void dumpEdges(const VPBlockBase *Block);
2810 
2811   /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
2812   /// its successor blocks.
2813   void dumpBasicBlock(const VPBasicBlock *BasicBlock);
2814 
2815   /// Print a given \p Region of the Plan.
2816   void dumpRegion(const VPRegionBlock *Region);
2817 
2818   unsigned getOrCreateBID(const VPBlockBase *Block) {
2819     return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
2820   }
2821 
2822   Twine getOrCreateName(const VPBlockBase *Block);
2823 
2824   Twine getUID(const VPBlockBase *Block);
2825 
2826   /// Print the information related to a CFG edge between two VPBlockBases.
2827   void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
2828                 const Twine &Label);
2829 
2830 public:
2831   VPlanPrinter(raw_ostream &O, const VPlan &P)
2832       : OS(O), Plan(P), SlotTracker(&P) {}
2833 
2834   LLVM_DUMP_METHOD void dump();
2835 };
2836 
2837 struct VPlanIngredient {
2838   const Value *V;
2839 
2840   VPlanIngredient(const Value *V) : V(V) {}
2841 
2842   void print(raw_ostream &O) const;
2843 };
2844 
2845 inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
2846   I.print(OS);
2847   return OS;
2848 }
2849 
2850 inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
2851   Plan.print(OS);
2852   return OS;
2853 }
2854 #endif
2855 
2856 //===----------------------------------------------------------------------===//
2857 // VPlan Utilities
2858 //===----------------------------------------------------------------------===//
2859 
2860 /// Class that provides utilities for VPBlockBases in VPlan.
2861 class VPBlockUtils {
2862 public:
2863   VPBlockUtils() = delete;
2864 
2865   /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
2866   /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
2867   /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
2868   /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
2869   /// have neither successors nor predecessors.
2870   static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
2871     assert(NewBlock->getSuccessors().empty() &&
2872            NewBlock->getPredecessors().empty() &&
2873            "Can't insert new block with predecessors or successors.");
2874     NewBlock->setParent(BlockPtr->getParent());
2875     SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
2876     for (VPBlockBase *Succ : Succs) {
2877       disconnectBlocks(BlockPtr, Succ);
2878       connectBlocks(NewBlock, Succ);
2879     }
2880     connectBlocks(BlockPtr, NewBlock);
2881   }
2882 
2883   /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
2884   /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
2885   /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
2886   /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
2887   /// and \p IfTrue and \p IfFalse must have neither successors nor
2888   /// predecessors.
2889   static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
2890                                    VPBlockBase *BlockPtr) {
2891     assert(IfTrue->getSuccessors().empty() &&
2892            "Can't insert IfTrue with successors.");
2893     assert(IfFalse->getSuccessors().empty() &&
2894            "Can't insert IfFalse with successors.");
2895     BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
2896     IfTrue->setPredecessors({BlockPtr});
2897     IfFalse->setPredecessors({BlockPtr});
2898     IfTrue->setParent(BlockPtr->getParent());
2899     IfFalse->setParent(BlockPtr->getParent());
2900   }
2901 
2902   /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
2903   /// the successors of \p From and \p From to the predecessors of \p To. Both
2904   /// VPBlockBases must have the same parent, which can be null. Both
2905   /// VPBlockBases can be already connected to other VPBlockBases.
2906   static void connectBlocks(VPBlockBase *From, VPBlockBase *To) {
2907     assert((From->getParent() == To->getParent()) &&
2908            "Can't connect two block with different parents");
2909     assert(From->getNumSuccessors() < 2 &&
2910            "Blocks can't have more than two successors.");
2911     From->appendSuccessor(To);
2912     To->appendPredecessor(From);
2913   }
2914 
2915   /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
2916   /// from the successors of \p From and \p From from the predecessors of \p To.
2917   static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) {
2918     assert(To && "Successor to disconnect is null.");
2919     From->removeSuccessor(To);
2920     To->removePredecessor(From);
2921   }
2922 
2923   /// Return an iterator range over \p Range which only includes \p BlockTy
2924   /// blocks. The accesses are casted to \p BlockTy.
2925   template <typename BlockTy, typename T>
2926   static auto blocksOnly(const T &Range) {
2927     // Create BaseTy with correct const-ness based on BlockTy.
2928     using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,
2929                                       const VPBlockBase, VPBlockBase>;
2930 
2931     // We need to first create an iterator range over (const) BlocktTy & instead
2932     // of (const) BlockTy * for filter_range to work properly.
2933     auto Mapped =
2934         map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
2935     auto Filter = make_filter_range(
2936         Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
2937     return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
2938       return cast<BlockTy>(&Block);
2939     });
2940   }
2941 };
2942 
2943 class VPInterleavedAccessInfo {
2944   DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
2945       InterleaveGroupMap;
2946 
2947   /// Type for mapping of instruction based interleave groups to VPInstruction
2948   /// interleave groups
2949   using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
2950                              InterleaveGroup<VPInstruction> *>;
2951 
2952   /// Recursively \p Region and populate VPlan based interleave groups based on
2953   /// \p IAI.
2954   void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
2955                    InterleavedAccessInfo &IAI);
2956   /// Recursively traverse \p Block and populate VPlan based interleave groups
2957   /// based on \p IAI.
2958   void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
2959                   InterleavedAccessInfo &IAI);
2960 
2961 public:
2962   VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
2963 
2964   ~VPInterleavedAccessInfo() {
2965     SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;
2966     // Avoid releasing a pointer twice.
2967     for (auto &I : InterleaveGroupMap)
2968       DelSet.insert(I.second);
2969     for (auto *Ptr : DelSet)
2970       delete Ptr;
2971   }
2972 
2973   /// Get the interleave group that \p Instr belongs to.
2974   ///
2975   /// \returns nullptr if doesn't have such group.
2976   InterleaveGroup<VPInstruction> *
2977   getInterleaveGroup(VPInstruction *Instr) const {
2978     return InterleaveGroupMap.lookup(Instr);
2979   }
2980 };
2981 
2982 /// Class that maps (parts of) an existing VPlan to trees of combined
2983 /// VPInstructions.
2984 class VPlanSlp {
2985   enum class OpMode { Failed, Load, Opcode };
2986 
2987   /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
2988   /// DenseMap keys.
2989   struct BundleDenseMapInfo {
2990     static SmallVector<VPValue *, 4> getEmptyKey() {
2991       return {reinterpret_cast<VPValue *>(-1)};
2992     }
2993 
2994     static SmallVector<VPValue *, 4> getTombstoneKey() {
2995       return {reinterpret_cast<VPValue *>(-2)};
2996     }
2997 
2998     static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
2999       return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
3000     }
3001 
3002     static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
3003                         const SmallVector<VPValue *, 4> &RHS) {
3004       return LHS == RHS;
3005     }
3006   };
3007 
3008   /// Mapping of values in the original VPlan to a combined VPInstruction.
3009   DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
3010       BundleToCombined;
3011 
3012   VPInterleavedAccessInfo &IAI;
3013 
3014   /// Basic block to operate on. For now, only instructions in a single BB are
3015   /// considered.
3016   const VPBasicBlock &BB;
3017 
3018   /// Indicates whether we managed to combine all visited instructions or not.
3019   bool CompletelySLP = true;
3020 
3021   /// Width of the widest combined bundle in bits.
3022   unsigned WidestBundleBits = 0;
3023 
3024   using MultiNodeOpTy =
3025       typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
3026 
3027   // Input operand bundles for the current multi node. Each multi node operand
3028   // bundle contains values not matching the multi node's opcode. They will
3029   // be reordered in reorderMultiNodeOps, once we completed building a
3030   // multi node.
3031   SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
3032 
3033   /// Indicates whether we are building a multi node currently.
3034   bool MultiNodeActive = false;
3035 
3036   /// Check if we can vectorize Operands together.
3037   bool areVectorizable(ArrayRef<VPValue *> Operands) const;
3038 
3039   /// Add combined instruction \p New for the bundle \p Operands.
3040   void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
3041 
3042   /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
3043   VPInstruction *markFailed();
3044 
3045   /// Reorder operands in the multi node to maximize sequential memory access
3046   /// and commutative operations.
3047   SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
3048 
3049   /// Choose the best candidate to use for the lane after \p Last. The set of
3050   /// candidates to choose from are values with an opcode matching \p Last's
3051   /// or loads consecutive to \p Last.
3052   std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
3053                                        SmallPtrSetImpl<VPValue *> &Candidates,
3054                                        VPInterleavedAccessInfo &IAI);
3055 
3056 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3057   /// Print bundle \p Values to dbgs().
3058   void dumpBundle(ArrayRef<VPValue *> Values);
3059 #endif
3060 
3061 public:
3062   VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
3063 
3064   ~VPlanSlp() = default;
3065 
3066   /// Tries to build an SLP tree rooted at \p Operands and returns a
3067   /// VPInstruction combining \p Operands, if they can be combined.
3068   VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
3069 
3070   /// Return the width of the widest combined bundle in bits.
3071   unsigned getWidestBundleBits() const { return WidestBundleBits; }
3072 
3073   /// Return true if all visited instruction can be combined.
3074   bool isCompletelySLP() const { return CompletelySLP; }
3075 };
3076 
3077 namespace vputils {
3078 
3079 /// Returns true if only the first lane of \p Def is used.
3080 bool onlyFirstLaneUsed(VPValue *Def);
3081 
3082 /// Returns true if only the first part of \p Def is used.
3083 bool onlyFirstPartUsed(VPValue *Def);
3084 
3085 /// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
3086 /// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
3087 /// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
3088 /// pre-header already contains a recipe expanding \p Expr, return it. If not,
3089 /// create a new one.
3090 VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
3091                                        ScalarEvolution &SE);
3092 
3093 /// Returns true if \p VPV is uniform after vectorization.
3094 inline bool isUniformAfterVectorization(VPValue *VPV) {
3095   // A value defined outside the vector region must be uniform after
3096   // vectorization inside a vector region.
3097   if (VPV->isDefinedOutsideVectorRegions())
3098     return true;
3099   VPRecipeBase *Def = VPV->getDefiningRecipe();
3100   assert(Def && "Must have definition for value defined inside vector region");
3101   if (auto Rep = dyn_cast<VPReplicateRecipe>(Def))
3102     return Rep->isUniform();
3103   if (auto *GEP = dyn_cast<VPWidenGEPRecipe>(Def))
3104     return all_of(GEP->operands(), isUniformAfterVectorization);
3105   return false;
3106 }
3107 } // end namespace vputils
3108 
3109 } // end namespace llvm
3110 
3111 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
3112