xref: /llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp (revision c8d3ccfa165d0193edf42ce1a0ba3077133c85e8)
1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a function pass that inserts VSETVLI instructions where
10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11 // instructions.
12 //
13 // This pass consists of 3 phases:
14 //
15 // Phase 1 collects how each basic block affects VL/VTYPE.
16 //
17 // Phase 2 uses the information from phase 1 to do a data flow analysis to
18 // propagate the VL/VTYPE changes through the function. This gives us the
19 // VL/VTYPE at the start of each basic block.
20 //
21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from
22 // phase 2 is used to prevent inserting a VSETVLI before the first vector
23 // instruction in the block if possible.
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "RISCV.h"
28 #include "RISCVSubtarget.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/CodeGen/LiveDebugVariables.h"
31 #include "llvm/CodeGen/LiveIntervals.h"
32 #include "llvm/CodeGen/LiveStacks.h"
33 #include "llvm/CodeGen/MachineFunctionPass.h"
34 #include <queue>
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "riscv-insert-vsetvli"
38 #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
39 
40 STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
41 STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
42 
43 static cl::opt<bool> EnsureWholeVectorRegisterMoveValidVTYPE(
44     DEBUG_TYPE "-whole-vector-register-move-valid-vtype", cl::Hidden,
45     cl::desc("Insert vsetvlis before vmvNr.vs to ensure vtype is valid and "
46              "vill is cleared"),
47     cl::init(true));
48 
49 namespace {
50 
51 /// Given a virtual register \p Reg, return the corresponding VNInfo for it.
52 /// This will return nullptr if the virtual register is an implicit_def or
53 /// if LiveIntervals is not available.
54 static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI,
55                                 const LiveIntervals *LIS) {
56   assert(Reg.isVirtual());
57   if (!LIS)
58     return nullptr;
59   auto &LI = LIS->getInterval(Reg);
60   SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI);
61   return LI.getVNInfoBefore(SI);
62 }
63 
64 static unsigned getVLOpNum(const MachineInstr &MI) {
65   return RISCVII::getVLOpNum(MI.getDesc());
66 }
67 
68 static unsigned getSEWOpNum(const MachineInstr &MI) {
69   return RISCVII::getSEWOpNum(MI.getDesc());
70 }
71 
72 static bool isVectorConfigInstr(const MachineInstr &MI) {
73   return MI.getOpcode() == RISCV::PseudoVSETVLI ||
74          MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
75          MI.getOpcode() == RISCV::PseudoVSETIVLI;
76 }
77 
78 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves
79 /// VL and only sets VTYPE.
80 static bool isVLPreservingConfig(const MachineInstr &MI) {
81   if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
82     return false;
83   assert(RISCV::X0 == MI.getOperand(1).getReg());
84   return RISCV::X0 == MI.getOperand(0).getReg();
85 }
86 
87 static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
88   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
89   default:
90     return false;
91   case RISCV::VFMV_S_F:
92   case RISCV::VFMV_V_F:
93     return true;
94   }
95 }
96 
97 static bool isScalarExtractInstr(const MachineInstr &MI) {
98   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
99   default:
100     return false;
101   case RISCV::VMV_X_S:
102   case RISCV::VFMV_F_S:
103     return true;
104   }
105 }
106 
107 static bool isScalarInsertInstr(const MachineInstr &MI) {
108   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
109   default:
110     return false;
111   case RISCV::VMV_S_X:
112   case RISCV::VFMV_S_F:
113     return true;
114   }
115 }
116 
117 static bool isScalarSplatInstr(const MachineInstr &MI) {
118   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
119   default:
120     return false;
121   case RISCV::VMV_V_I:
122   case RISCV::VMV_V_X:
123   case RISCV::VFMV_V_F:
124     return true;
125   }
126 }
127 
128 static bool isVSlideInstr(const MachineInstr &MI) {
129   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
130   default:
131     return false;
132   case RISCV::VSLIDEDOWN_VX:
133   case RISCV::VSLIDEDOWN_VI:
134   case RISCV::VSLIDEUP_VX:
135   case RISCV::VSLIDEUP_VI:
136     return true;
137   }
138 }
139 
140 /// Get the EEW for a load or store instruction.  Return std::nullopt if MI is
141 /// not a load or store which ignores SEW.
142 static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
143   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
144   default:
145     return std::nullopt;
146   case RISCV::VLE8_V:
147   case RISCV::VLSE8_V:
148   case RISCV::VSE8_V:
149   case RISCV::VSSE8_V:
150     return 8;
151   case RISCV::VLE16_V:
152   case RISCV::VLSE16_V:
153   case RISCV::VSE16_V:
154   case RISCV::VSSE16_V:
155     return 16;
156   case RISCV::VLE32_V:
157   case RISCV::VLSE32_V:
158   case RISCV::VSE32_V:
159   case RISCV::VSSE32_V:
160     return 32;
161   case RISCV::VLE64_V:
162   case RISCV::VLSE64_V:
163   case RISCV::VSE64_V:
164   case RISCV::VSSE64_V:
165     return 64;
166   }
167 }
168 
169 static bool isNonZeroLoadImmediate(const MachineInstr &MI) {
170   return MI.getOpcode() == RISCV::ADDI &&
171     MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
172     MI.getOperand(1).getReg() == RISCV::X0 &&
173     MI.getOperand(2).getImm() != 0;
174 }
175 
176 /// Return true if this is an operation on mask registers.  Note that
177 /// this includes both arithmetic/logical ops and load/store (vlm/vsm).
178 static bool isMaskRegOp(const MachineInstr &MI) {
179   if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
180     return false;
181   const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
182   // A Log2SEW of 0 is an operation on mask registers only.
183   return Log2SEW == 0;
184 }
185 
186 /// Return true if the inactive elements in the result are entirely undefined.
187 /// Note that this is different from "agnostic" as defined by the vector
188 /// specification.  Agnostic requires each lane to either be undisturbed, or
189 /// take the value -1; no other value is allowed.
190 static bool hasUndefinedPassthru(const MachineInstr &MI) {
191 
192   unsigned UseOpIdx;
193   if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
194     // If there is no passthrough operand, then the pass through
195     // lanes are undefined.
196     return true;
197 
198   // All undefined passthrus should be $noreg: see
199   // RISCVDAGToDAGISel::doPeepholeNoRegPassThru
200   const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
201   return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
202 }
203 
204 /// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs.
205 static bool isVectorCopy(const TargetRegisterInfo *TRI,
206                          const MachineInstr &MI) {
207   return MI.isCopy() && MI.getOperand(0).getReg().isPhysical() &&
208          RISCVRegisterInfo::isRVVRegClass(
209              TRI->getMinimalPhysRegClass(MI.getOperand(0).getReg()));
210 }
211 
212 /// Which subfields of VL or VTYPE have values we need to preserve?
213 struct DemandedFields {
214   // Some unknown property of VL is used.  If demanded, must preserve entire
215   // value.
216   bool VLAny = false;
217   // Only zero vs non-zero is used. If demanded, can change non-zero values.
218   bool VLZeroness = false;
219   // What properties of SEW we need to preserve.
220   enum : uint8_t {
221     SEWEqual = 3, // The exact value of SEW needs to be preserved.
222     SEWGreaterThanOrEqualAndLessThan64 =
223         2, // SEW can be changed as long as it's greater
224            // than or equal to the original value, but must be less
225            // than 64.
226     SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater
227                                // than or equal to the original value.
228     SEWNone = 0                // We don't need to preserve SEW at all.
229   } SEW = SEWNone;
230   enum : uint8_t {
231     LMULEqual = 2, // The exact value of LMUL needs to be preserved.
232     LMULLessThanOrEqualToM1 = 1, // We can use any LMUL <= M1.
233     LMULNone = 0                 // We don't need to preserve LMUL at all.
234   } LMUL = LMULNone;
235   bool SEWLMULRatio = false;
236   bool TailPolicy = false;
237   bool MaskPolicy = false;
238   // If this is true, we demand that VTYPE is set to some legal state, i.e. that
239   // vill is unset.
240   bool VILL = false;
241 
242   // Return true if any part of VTYPE was used
243   bool usedVTYPE() const {
244     return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL;
245   }
246 
247   // Return true if any property of VL was used
248   bool usedVL() {
249     return VLAny || VLZeroness;
250   }
251 
252   // Mark all VTYPE subfields and properties as demanded
253   void demandVTYPE() {
254     SEW = SEWEqual;
255     LMUL = LMULEqual;
256     SEWLMULRatio = true;
257     TailPolicy = true;
258     MaskPolicy = true;
259     VILL = true;
260   }
261 
262   // Mark all VL properties as demanded
263   void demandVL() {
264     VLAny = true;
265     VLZeroness = true;
266   }
267 
268   static DemandedFields all() {
269     DemandedFields DF;
270     DF.demandVTYPE();
271     DF.demandVL();
272     return DF;
273   }
274 
275   // Make this the result of demanding both the fields in this and B.
276   void doUnion(const DemandedFields &B) {
277     VLAny |= B.VLAny;
278     VLZeroness |= B.VLZeroness;
279     SEW = std::max(SEW, B.SEW);
280     LMUL = std::max(LMUL, B.LMUL);
281     SEWLMULRatio |= B.SEWLMULRatio;
282     TailPolicy |= B.TailPolicy;
283     MaskPolicy |= B.MaskPolicy;
284     VILL |= B.VILL;
285   }
286 
287 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
288   /// Support for debugging, callable in GDB: V->dump()
289   LLVM_DUMP_METHOD void dump() const {
290     print(dbgs());
291     dbgs() << "\n";
292   }
293 
294   /// Implement operator<<.
295   void print(raw_ostream &OS) const {
296     OS << "{";
297     OS << "VLAny=" << VLAny << ", ";
298     OS << "VLZeroness=" << VLZeroness << ", ";
299     OS << "SEW=";
300     switch (SEW) {
301     case SEWEqual:
302       OS << "SEWEqual";
303       break;
304     case SEWGreaterThanOrEqual:
305       OS << "SEWGreaterThanOrEqual";
306       break;
307     case SEWGreaterThanOrEqualAndLessThan64:
308       OS << "SEWGreaterThanOrEqualAndLessThan64";
309       break;
310     case SEWNone:
311       OS << "SEWNone";
312       break;
313     };
314     OS << ", ";
315     OS << "LMUL=";
316     switch (LMUL) {
317     case LMULEqual:
318       OS << "LMULEqual";
319       break;
320     case LMULLessThanOrEqualToM1:
321       OS << "LMULLessThanOrEqualToM1";
322       break;
323     case LMULNone:
324       OS << "LMULNone";
325       break;
326     };
327     OS << ", ";
328     OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
329     OS << "TailPolicy=" << TailPolicy << ", ";
330     OS << "MaskPolicy=" << MaskPolicy << ", ";
331     OS << "VILL=" << VILL;
332     OS << "}";
333   }
334 #endif
335 };
336 
337 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
338 LLVM_ATTRIBUTE_USED
339 inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
340   DF.print(OS);
341   return OS;
342 }
343 #endif
344 
345 static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
346   auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
347   return Fractional || LMul == 1;
348 }
349 
350 /// Return true if moving from CurVType to NewVType is
351 /// indistinguishable from the perspective of an instruction (or set
352 /// of instructions) which use only the Used subfields and properties.
353 static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
354                                 const DemandedFields &Used) {
355   switch (Used.SEW) {
356   case DemandedFields::SEWNone:
357     break;
358   case DemandedFields::SEWEqual:
359     if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
360       return false;
361     break;
362   case DemandedFields::SEWGreaterThanOrEqual:
363     if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
364       return false;
365     break;
366   case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
367     if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
368         RISCVVType::getSEW(NewVType) >= 64)
369       return false;
370     break;
371   }
372 
373   switch (Used.LMUL) {
374   case DemandedFields::LMULNone:
375     break;
376   case DemandedFields::LMULEqual:
377     if (RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
378       return false;
379     break;
380   case DemandedFields::LMULLessThanOrEqualToM1:
381     if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(NewVType)))
382       return false;
383     break;
384   }
385 
386   if (Used.SEWLMULRatio) {
387     auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
388                                               RISCVVType::getVLMUL(CurVType));
389     auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
390                                               RISCVVType::getVLMUL(NewVType));
391     if (Ratio1 != Ratio2)
392       return false;
393   }
394 
395   if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
396                              RISCVVType::isTailAgnostic(NewVType))
397     return false;
398   if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
399                              RISCVVType::isMaskAgnostic(NewVType))
400     return false;
401   return true;
402 }
403 
404 /// Return the fields and properties demanded by the provided instruction.
405 DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
406   // This function works in coalesceVSETVLI too. We can still use the value of a
407   // SEW, VL, or Policy operand even though it might not be the exact value in
408   // the VL or VTYPE, since we only care about what the instruction originally
409   // demanded.
410 
411   // Most instructions don't use any of these subfeilds.
412   DemandedFields Res;
413   // Start conservative if registers are used
414   if (MI.isCall() || MI.isInlineAsm() ||
415       MI.readsRegister(RISCV::VL, /*TRI=*/nullptr))
416     Res.demandVL();
417   if (MI.isCall() || MI.isInlineAsm() ||
418       MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr))
419     Res.demandVTYPE();
420   // Start conservative on the unlowered form too
421   uint64_t TSFlags = MI.getDesc().TSFlags;
422   if (RISCVII::hasSEWOp(TSFlags)) {
423     Res.demandVTYPE();
424     if (RISCVII::hasVLOp(TSFlags))
425       if (const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
426           !VLOp.isReg() || !VLOp.isUndef())
427         Res.demandVL();
428 
429     // Behavior is independent of mask policy.
430     if (!RISCVII::usesMaskPolicy(TSFlags))
431       Res.MaskPolicy = false;
432   }
433 
434   // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
435   // They instead demand the ratio of the two which is used in computing
436   // EMUL, but which allows us the flexibility to change SEW and LMUL
437   // provided we don't change the ratio.
438   // Note: We assume that the instructions initial SEW is the EEW encoded
439   // in the opcode.  This is asserted when constructing the VSETVLIInfo.
440   if (getEEWForLoadStore(MI)) {
441     Res.SEW = DemandedFields::SEWNone;
442     Res.LMUL = DemandedFields::LMULNone;
443   }
444 
445   // Store instructions don't use the policy fields.
446   if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
447     Res.TailPolicy = false;
448     Res.MaskPolicy = false;
449   }
450 
451   // If this is a mask reg operation, it only cares about VLMAX.
452   // TODO: Possible extensions to this logic
453   // * Probably ok if available VLMax is larger than demanded
454   // * The policy bits can probably be ignored..
455   if (isMaskRegOp(MI)) {
456     Res.SEW = DemandedFields::SEWNone;
457     Res.LMUL = DemandedFields::LMULNone;
458   }
459 
460   // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
461   if (isScalarInsertInstr(MI)) {
462     Res.LMUL = DemandedFields::LMULNone;
463     Res.SEWLMULRatio = false;
464     Res.VLAny = false;
465     // For vmv.s.x and vfmv.s.f, if the passthru is *undefined*, we don't
466     // need to preserve any other bits and are thus compatible with any larger,
467     // etype and can disregard policy bits.  Warning: It's tempting to try doing
468     // this for any tail agnostic operation, but we can't as TA requires
469     // tail lanes to either be the original value or -1.  We are writing
470     // unknown bits to the lanes here.
471     if (hasUndefinedPassthru(MI)) {
472       if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
473         Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
474       else
475         Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
476       Res.TailPolicy = false;
477     }
478   }
479 
480   // vmv.x.s, and vfmv.f.s are unconditional and ignore everything except SEW.
481   if (isScalarExtractInstr(MI)) {
482     assert(!RISCVII::hasVLOp(TSFlags));
483     Res.LMUL = DemandedFields::LMULNone;
484     Res.SEWLMULRatio = false;
485     Res.TailPolicy = false;
486     Res.MaskPolicy = false;
487   }
488 
489   if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) {
490     const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
491     // A slidedown/slideup with an *undefined* passthru can freely clobber
492     // elements not copied from the source vector (e.g. masked off, tail, or
493     // slideup's prefix). Notes:
494     // * We can't modify SEW here since the slide amount is in units of SEW.
495     // * VL=1 is special only because we have existing support for zero vs
496     //   non-zero VL.  We could generalize this if we had a VL > C predicate.
497     // * The LMUL1 restriction is for machines whose latency may depend on VL.
498     // * As above, this is only legal for tail "undefined" not "agnostic".
499     if (isVSlideInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 &&
500         hasUndefinedPassthru(MI)) {
501       Res.VLAny = false;
502       Res.VLZeroness = true;
503       Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
504       Res.TailPolicy = false;
505     }
506 
507     // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the
508     // same semantically as vmv.s.x.  This is particularly useful since we don't
509     // have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in
510     // it's place. Since a splat is non-constant time in LMUL, we do need to be
511     // careful to not increase the number of active vector registers (unlike for
512     // vmv.s.x.)
513     if (isScalarSplatInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 &&
514         hasUndefinedPassthru(MI)) {
515       Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
516       Res.SEWLMULRatio = false;
517       Res.VLAny = false;
518       if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
519         Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
520       else
521         Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
522       Res.TailPolicy = false;
523     }
524   }
525 
526   // In §32.16.6, whole vector register moves have a dependency on SEW. At the
527   // MIR level though we don't encode the element type, and it gives the same
528   // result whatever the SEW may be.
529   //
530   // However it does need valid SEW, i.e. vill must be cleared. The entry to a
531   // function, calls and inline assembly may all set it, so make sure we clear
532   // it for whole register copies. Do this by leaving VILL demanded.
533   if (isVectorCopy(ST->getRegisterInfo(), MI)) {
534     Res.LMUL = DemandedFields::LMULNone;
535     Res.SEW = DemandedFields::SEWNone;
536     Res.SEWLMULRatio = false;
537     Res.TailPolicy = false;
538     Res.MaskPolicy = false;
539   }
540 
541   return Res;
542 }
543 
544 /// Defines the abstract state with which the forward dataflow models the
545 /// values of the VL and VTYPE registers after insertion.
546 class VSETVLIInfo {
547   struct AVLDef {
548     // Every AVLDef should have a VNInfo, unless we're running without
549     // LiveIntervals in which case this will be nullptr.
550     const VNInfo *ValNo;
551     Register DefReg;
552   };
553   union {
554     AVLDef AVLRegDef;
555     unsigned AVLImm;
556   };
557 
558   enum : uint8_t {
559     Uninitialized,
560     AVLIsReg,
561     AVLIsImm,
562     AVLIsVLMAX,
563     Unknown, // AVL and VTYPE are fully unknown
564   } State = Uninitialized;
565 
566   // Fields from VTYPE.
567   RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
568   uint8_t SEW = 0;
569   uint8_t TailAgnostic : 1;
570   uint8_t MaskAgnostic : 1;
571   uint8_t SEWLMULRatioOnly : 1;
572 
573 public:
574   VSETVLIInfo()
575       : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
576         SEWLMULRatioOnly(false) {}
577 
578   static VSETVLIInfo getUnknown() {
579     VSETVLIInfo Info;
580     Info.setUnknown();
581     return Info;
582   }
583 
584   bool isValid() const { return State != Uninitialized; }
585   void setUnknown() { State = Unknown; }
586   bool isUnknown() const { return State == Unknown; }
587 
588   void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) {
589     assert(AVLReg.isVirtual());
590     AVLRegDef.ValNo = VNInfo;
591     AVLRegDef.DefReg = AVLReg;
592     State = AVLIsReg;
593   }
594 
595   void setAVLImm(unsigned Imm) {
596     AVLImm = Imm;
597     State = AVLIsImm;
598   }
599 
600   void setAVLVLMAX() { State = AVLIsVLMAX; }
601 
602   bool hasAVLImm() const { return State == AVLIsImm; }
603   bool hasAVLReg() const { return State == AVLIsReg; }
604   bool hasAVLVLMAX() const { return State == AVLIsVLMAX; }
605   Register getAVLReg() const {
606     assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual());
607     return AVLRegDef.DefReg;
608   }
609   unsigned getAVLImm() const {
610     assert(hasAVLImm());
611     return AVLImm;
612   }
613   const VNInfo *getAVLVNInfo() const {
614     assert(hasAVLReg());
615     return AVLRegDef.ValNo;
616   }
617   // Most AVLIsReg infos will have a single defining MachineInstr, unless it was
618   // a PHI node. In that case getAVLVNInfo()->def will point to the block
619   // boundary slot and this will return nullptr.  If LiveIntervals isn't
620   // available, nullptr is also returned.
621   const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const {
622     assert(hasAVLReg());
623     if (!LIS || getAVLVNInfo()->isPHIDef())
624       return nullptr;
625     auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def);
626     assert(MI);
627     return MI;
628   }
629 
630   void setAVL(const VSETVLIInfo &Info) {
631     assert(Info.isValid());
632     if (Info.isUnknown())
633       setUnknown();
634     else if (Info.hasAVLReg())
635       setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg());
636     else if (Info.hasAVLVLMAX())
637       setAVLVLMAX();
638     else {
639       assert(Info.hasAVLImm());
640       setAVLImm(Info.getAVLImm());
641     }
642   }
643 
644   unsigned getSEW() const { return SEW; }
645   RISCVII::VLMUL getVLMUL() const { return VLMul; }
646   bool getTailAgnostic() const { return TailAgnostic; }
647   bool getMaskAgnostic() const { return MaskAgnostic; }
648 
649   bool hasNonZeroAVL(const LiveIntervals *LIS) const {
650     if (hasAVLImm())
651       return getAVLImm() > 0;
652     if (hasAVLReg()) {
653       if (auto *DefMI = getAVLDefMI(LIS))
654         return isNonZeroLoadImmediate(*DefMI);
655     }
656     if (hasAVLVLMAX())
657       return true;
658     return false;
659   }
660 
661   bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
662                          const LiveIntervals *LIS) const {
663     if (hasSameAVL(Other))
664       return true;
665     return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS));
666   }
667 
668   bool hasSameAVLLatticeValue(const VSETVLIInfo &Other) const {
669     if (hasAVLReg() && Other.hasAVLReg()) {
670       assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
671              "we either have intervals or we don't");
672       if (!getAVLVNInfo())
673         return getAVLReg() == Other.getAVLReg();
674       return getAVLVNInfo()->id == Other.getAVLVNInfo()->id &&
675              getAVLReg() == Other.getAVLReg();
676     }
677 
678     if (hasAVLImm() && Other.hasAVLImm())
679       return getAVLImm() == Other.getAVLImm();
680 
681     if (hasAVLVLMAX())
682       return Other.hasAVLVLMAX() && hasSameVLMAX(Other);
683 
684     return false;
685   }
686 
687   // Return true if the two lattice values are guaranteed to have
688   // the same AVL value at runtime.
689   bool hasSameAVL(const VSETVLIInfo &Other) const {
690     // Without LiveIntervals, we don't know which instruction defines a
691     // register.  Since a register may be redefined, this means all AVLIsReg
692     // states must be treated as possibly distinct.
693     if (hasAVLReg() && Other.hasAVLReg()) {
694       assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
695              "we either have intervals or we don't");
696       if (!getAVLVNInfo())
697         return false;
698     }
699     return hasSameAVLLatticeValue(Other);
700   }
701 
702   void setVTYPE(unsigned VType) {
703     assert(isValid() && !isUnknown() &&
704            "Can't set VTYPE for uninitialized or unknown");
705     VLMul = RISCVVType::getVLMUL(VType);
706     SEW = RISCVVType::getSEW(VType);
707     TailAgnostic = RISCVVType::isTailAgnostic(VType);
708     MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
709   }
710   void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
711     assert(isValid() && !isUnknown() &&
712            "Can't set VTYPE for uninitialized or unknown");
713     VLMul = L;
714     SEW = S;
715     TailAgnostic = TA;
716     MaskAgnostic = MA;
717   }
718 
719   void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; }
720 
721   unsigned encodeVTYPE() const {
722     assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
723            "Can't encode VTYPE for uninitialized or unknown");
724     return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
725   }
726 
727   bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
728 
729   bool hasSameVTYPE(const VSETVLIInfo &Other) const {
730     assert(isValid() && Other.isValid() &&
731            "Can't compare invalid VSETVLIInfos");
732     assert(!isUnknown() && !Other.isUnknown() &&
733            "Can't compare VTYPE in unknown state");
734     assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
735            "Can't compare when only LMUL/SEW ratio is valid.");
736     return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
737            std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
738                     Other.MaskAgnostic);
739   }
740 
741   unsigned getSEWLMULRatio() const {
742     assert(isValid() && !isUnknown() &&
743            "Can't use VTYPE for uninitialized or unknown");
744     return RISCVVType::getSEWLMULRatio(SEW, VLMul);
745   }
746 
747   // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
748   // Note that having the same VLMAX ensures that both share the same
749   // function from AVL to VL; that is, they must produce the same VL value
750   // for any given AVL value.
751   bool hasSameVLMAX(const VSETVLIInfo &Other) const {
752     assert(isValid() && Other.isValid() &&
753            "Can't compare invalid VSETVLIInfos");
754     assert(!isUnknown() && !Other.isUnknown() &&
755            "Can't compare VTYPE in unknown state");
756     return getSEWLMULRatio() == Other.getSEWLMULRatio();
757   }
758 
759   bool hasCompatibleVTYPE(const DemandedFields &Used,
760                           const VSETVLIInfo &Require) const {
761     return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
762   }
763 
764   // Determine whether the vector instructions requirements represented by
765   // Require are compatible with the previous vsetvli instruction represented
766   // by this.  MI is the instruction whose requirements we're considering.
767   bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
768                     const LiveIntervals *LIS) const {
769     assert(isValid() && Require.isValid() &&
770            "Can't compare invalid VSETVLIInfos");
771     // Nothing is compatible with Unknown.
772     if (isUnknown() || Require.isUnknown())
773       return false;
774 
775     // If only our VLMAX ratio is valid, then this isn't compatible.
776     if (SEWLMULRatioOnly || Require.SEWLMULRatioOnly)
777       return false;
778 
779     if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require)))
780       return false;
781 
782     if (Used.VLZeroness && !hasEquallyZeroAVL(Require, LIS))
783       return false;
784 
785     return hasCompatibleVTYPE(Used, Require);
786   }
787 
788   bool operator==(const VSETVLIInfo &Other) const {
789     // Uninitialized is only equal to another Uninitialized.
790     if (!isValid())
791       return !Other.isValid();
792     if (!Other.isValid())
793       return !isValid();
794 
795     // Unknown is only equal to another Unknown.
796     if (isUnknown())
797       return Other.isUnknown();
798     if (Other.isUnknown())
799       return isUnknown();
800 
801     if (!hasSameAVLLatticeValue(Other))
802       return false;
803 
804     // If the SEWLMULRatioOnly bits are different, then they aren't equal.
805     if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
806       return false;
807 
808     // If only the VLMAX is valid, check that it is the same.
809     if (SEWLMULRatioOnly)
810       return hasSameVLMAX(Other);
811 
812     // If the full VTYPE is valid, check that it is the same.
813     return hasSameVTYPE(Other);
814   }
815 
816   bool operator!=(const VSETVLIInfo &Other) const {
817     return !(*this == Other);
818   }
819 
820   // Calculate the VSETVLIInfo visible to a block assuming this and Other are
821   // both predecessors.
822   VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
823     // If the new value isn't valid, ignore it.
824     if (!Other.isValid())
825       return *this;
826 
827     // If this value isn't valid, this must be the first predecessor, use it.
828     if (!isValid())
829       return Other;
830 
831     // If either is unknown, the result is unknown.
832     if (isUnknown() || Other.isUnknown())
833       return VSETVLIInfo::getUnknown();
834 
835     // If we have an exact, match return this.
836     if (*this == Other)
837       return *this;
838 
839     // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
840     // return an SEW/LMUL ratio only value.
841     if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
842       VSETVLIInfo MergeInfo = *this;
843       MergeInfo.SEWLMULRatioOnly = true;
844       return MergeInfo;
845     }
846 
847     // Otherwise the result is unknown.
848     return VSETVLIInfo::getUnknown();
849   }
850 
851 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
852   /// Support for debugging, callable in GDB: V->dump()
853   LLVM_DUMP_METHOD void dump() const {
854     print(dbgs());
855     dbgs() << "\n";
856   }
857 
858   /// Implement operator<<.
859   /// @{
860   void print(raw_ostream &OS) const {
861     OS << "{";
862     if (!isValid())
863       OS << "Uninitialized";
864     if (isUnknown())
865       OS << "unknown";
866     if (hasAVLReg())
867       OS << "AVLReg=" << llvm::printReg(getAVLReg());
868     if (hasAVLImm())
869       OS << "AVLImm=" << (unsigned)AVLImm;
870     if (hasAVLVLMAX())
871       OS << "AVLVLMAX";
872     OS << ", "
873        << "VLMul=" << (unsigned)VLMul << ", "
874        << "SEW=" << (unsigned)SEW << ", "
875        << "TailAgnostic=" << (bool)TailAgnostic << ", "
876        << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
877        << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
878   }
879 #endif
880 };
881 
882 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
883 LLVM_ATTRIBUTE_USED
884 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
885   V.print(OS);
886   return OS;
887 }
888 #endif
889 
890 struct BlockData {
891   // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
892   // block. Calculated in Phase 2.
893   VSETVLIInfo Exit;
894 
895   // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
896   // blocks. Calculated in Phase 2, and used by Phase 3.
897   VSETVLIInfo Pred;
898 
899   // Keeps track of whether the block is already in the queue.
900   bool InQueue = false;
901 
902   BlockData() = default;
903 };
904 
905 class RISCVInsertVSETVLI : public MachineFunctionPass {
906   const RISCVSubtarget *ST;
907   const TargetInstrInfo *TII;
908   MachineRegisterInfo *MRI;
909   // Possibly null!
910   LiveIntervals *LIS;
911 
912   std::vector<BlockData> BlockInfo;
913   std::queue<const MachineBasicBlock *> WorkList;
914 
915 public:
916   static char ID;
917 
918   RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
919   bool runOnMachineFunction(MachineFunction &MF) override;
920 
921   void getAnalysisUsage(AnalysisUsage &AU) const override {
922     AU.setPreservesCFG();
923 
924     AU.addUsedIfAvailable<LiveIntervalsWrapperPass>();
925     AU.addPreserved<LiveIntervalsWrapperPass>();
926     AU.addPreserved<SlotIndexesWrapperPass>();
927     AU.addPreserved<LiveDebugVariablesWrapperLegacy>();
928     AU.addPreserved<LiveStacksWrapperLegacy>();
929 
930     MachineFunctionPass::getAnalysisUsage(AU);
931   }
932 
933   StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
934 
935 private:
936   bool needVSETVLI(const DemandedFields &Used, const VSETVLIInfo &Require,
937                    const VSETVLIInfo &CurInfo) const;
938   bool needVSETVLIPHI(const VSETVLIInfo &Require,
939                       const MachineBasicBlock &MBB) const;
940   void insertVSETVLI(MachineBasicBlock &MBB,
941                      MachineBasicBlock::iterator InsertPt, DebugLoc DL,
942                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
943 
944   void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
945   void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
946   bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
947                              VSETVLIInfo &Info) const;
948   void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
949   void emitVSETVLIs(MachineBasicBlock &MBB);
950   void doPRE(MachineBasicBlock &MBB);
951   void insertReadVL(MachineBasicBlock &MBB);
952 
953   bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI,
954                             const DemandedFields &Used) const;
955   void coalesceVSETVLIs(MachineBasicBlock &MBB) const;
956 
957   VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const;
958   VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const;
959   void forwardVSETVLIAVL(VSETVLIInfo &Info) const;
960 };
961 
962 } // end anonymous namespace
963 
964 char RISCVInsertVSETVLI::ID = 0;
965 char &llvm::RISCVInsertVSETVLIID = RISCVInsertVSETVLI::ID;
966 
967 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
968                 false, false)
969 
970 // If the AVL is defined by a vsetvli's output vl with the same VLMAX, we can
971 // replace the AVL operand with the AVL of the defining vsetvli. E.g.
972 //
973 // %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
974 // $x0 = PseudoVSETVLI %vl:gpr, SEW=32, LMUL=M1
975 // ->
976 // %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
977 // $x0 = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
978 void RISCVInsertVSETVLI::forwardVSETVLIAVL(VSETVLIInfo &Info) const {
979   if (!Info.hasAVLReg())
980     return;
981   const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
982   if (!DefMI || !isVectorConfigInstr(*DefMI))
983     return;
984   VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI);
985   if (!DefInstrInfo.hasSameVLMAX(Info))
986     return;
987   Info.setAVL(DefInstrInfo);
988 }
989 
990 // Return a VSETVLIInfo representing the changes made by this VSETVLI or
991 // VSETIVLI instruction.
992 VSETVLIInfo
993 RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const {
994   VSETVLIInfo NewInfo;
995   if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
996     NewInfo.setAVLImm(MI.getOperand(1).getImm());
997   } else {
998     assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
999            MI.getOpcode() == RISCV::PseudoVSETVLIX0);
1000     Register AVLReg = MI.getOperand(1).getReg();
1001     assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
1002            "Can't handle X0, X0 vsetvli yet");
1003     if (AVLReg == RISCV::X0)
1004       NewInfo.setAVLVLMAX();
1005     else if (MI.getOperand(1).isUndef())
1006       // Otherwise use an AVL of 1 to avoid depending on previous vl.
1007       NewInfo.setAVLImm(1);
1008     else {
1009       VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS);
1010       NewInfo.setAVLRegDef(VNI, AVLReg);
1011     }
1012   }
1013   NewInfo.setVTYPE(MI.getOperand(2).getImm());
1014 
1015   forwardVSETVLIAVL(NewInfo);
1016 
1017   return NewInfo;
1018 }
1019 
1020 static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
1021                              RISCVII::VLMUL VLMul) {
1022   auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul);
1023   if (Fractional)
1024     VLEN = VLEN / LMul;
1025   else
1026     VLEN = VLEN * LMul;
1027   return VLEN/SEW;
1028 }
1029 
1030 VSETVLIInfo
1031 RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
1032   VSETVLIInfo InstrInfo;
1033   const uint64_t TSFlags = MI.getDesc().TSFlags;
1034 
1035   bool TailAgnostic = true;
1036   bool MaskAgnostic = true;
1037   if (!hasUndefinedPassthru(MI)) {
1038     // Start with undisturbed.
1039     TailAgnostic = false;
1040     MaskAgnostic = false;
1041 
1042     // If there is a policy operand, use it.
1043     if (RISCVII::hasVecPolicyOp(TSFlags)) {
1044       const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
1045       uint64_t Policy = Op.getImm();
1046       assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
1047              "Invalid Policy Value");
1048       TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
1049       MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
1050     }
1051 
1052     // Some pseudo instructions force a tail agnostic policy despite having a
1053     // tied def.
1054     if (RISCVII::doesForceTailAgnostic(TSFlags))
1055       TailAgnostic = true;
1056 
1057     if (!RISCVII::usesMaskPolicy(TSFlags))
1058       MaskAgnostic = true;
1059   }
1060 
1061   RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
1062 
1063   unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
1064   // A Log2SEW of 0 is an operation on mask registers only.
1065   unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
1066   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
1067 
1068   if (RISCVII::hasVLOp(TSFlags)) {
1069     const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1070     if (VLOp.isImm()) {
1071       int64_t Imm = VLOp.getImm();
1072       // Conver the VLMax sentintel to X0 register.
1073       if (Imm == RISCV::VLMaxSentinel) {
1074         // If we know the exact VLEN, see if we can use the constant encoding
1075         // for the VLMAX instead.  This reduces register pressure slightly.
1076         const unsigned VLMAX = computeVLMAX(ST->getRealMaxVLen(), SEW, VLMul);
1077         if (ST->getRealMinVLen() == ST->getRealMaxVLen() && VLMAX <= 31)
1078           InstrInfo.setAVLImm(VLMAX);
1079         else
1080           InstrInfo.setAVLVLMAX();
1081       }
1082       else
1083         InstrInfo.setAVLImm(Imm);
1084     } else if (VLOp.isUndef()) {
1085       // Otherwise use an AVL of 1 to avoid depending on previous vl.
1086       InstrInfo.setAVLImm(1);
1087     } else {
1088       VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS);
1089       InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
1090     }
1091   } else {
1092     assert(isScalarExtractInstr(MI));
1093     // Pick a random value for state tracking purposes, will be ignored via
1094     // the demanded fields mechanism
1095     InstrInfo.setAVLImm(1);
1096   }
1097 #ifndef NDEBUG
1098   if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
1099     assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
1100   }
1101 #endif
1102   InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
1103 
1104   forwardVSETVLIAVL(InstrInfo);
1105 
1106   return InstrInfo;
1107 }
1108 
1109 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
1110                      MachineBasicBlock::iterator InsertPt, DebugLoc DL,
1111                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
1112 
1113   ++NumInsertedVSETVL;
1114   if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
1115     // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
1116     // VLMAX.
1117     if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
1118       auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1119                     .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1120                     .addReg(RISCV::X0, RegState::Kill)
1121                     .addImm(Info.encodeVTYPE())
1122                     .addReg(RISCV::VL, RegState::Implicit);
1123       if (LIS)
1124         LIS->InsertMachineInstrInMaps(*MI);
1125       return;
1126     }
1127 
1128     // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
1129     // it has the same VLMAX we want and the last VL/VTYPE we observed is the
1130     // same, we can use the X0, X0 form.
1131     if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) {
1132       if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
1133           DefMI && isVectorConfigInstr(*DefMI)) {
1134         VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1135         if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
1136           auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1137                         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1138                         .addReg(RISCV::X0, RegState::Kill)
1139                         .addImm(Info.encodeVTYPE())
1140                         .addReg(RISCV::VL, RegState::Implicit);
1141           if (LIS)
1142             LIS->InsertMachineInstrInMaps(*MI);
1143           return;
1144         }
1145       }
1146     }
1147   }
1148 
1149   if (Info.hasAVLImm()) {
1150     auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
1151                   .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1152                   .addImm(Info.getAVLImm())
1153                   .addImm(Info.encodeVTYPE());
1154     if (LIS)
1155       LIS->InsertMachineInstrInMaps(*MI);
1156     return;
1157   }
1158 
1159   if (Info.hasAVLVLMAX()) {
1160     Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
1161     auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1162                   .addReg(DestReg, RegState::Define | RegState::Dead)
1163                   .addReg(RISCV::X0, RegState::Kill)
1164                   .addImm(Info.encodeVTYPE());
1165     if (LIS) {
1166       LIS->InsertMachineInstrInMaps(*MI);
1167       LIS->createAndComputeVirtRegInterval(DestReg);
1168     }
1169     return;
1170   }
1171 
1172   Register AVLReg = Info.getAVLReg();
1173   MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
1174   auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI))
1175                 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1176                 .addReg(AVLReg)
1177                 .addImm(Info.encodeVTYPE());
1178   if (LIS) {
1179     LIS->InsertMachineInstrInMaps(*MI);
1180     LiveInterval &LI = LIS->getInterval(AVLReg);
1181     SlotIndex SI = LIS->getInstructionIndex(*MI).getRegSlot();
1182     // If the AVL value isn't live at MI, do a quick check to see if it's easily
1183     // extendable. Otherwise, we need to copy it.
1184     if (LI.getVNInfoBefore(SI) != Info.getAVLVNInfo()) {
1185       if (!LI.liveAt(SI) && LI.containsOneValue())
1186         LIS->extendToIndices(LI, SI);
1187       else {
1188         Register AVLCopyReg =
1189             MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass);
1190         MachineBasicBlock::iterator II;
1191         if (Info.getAVLVNInfo()->isPHIDef())
1192           II = LIS->getMBBFromIndex(Info.getAVLVNInfo()->def)->getFirstNonPHI();
1193         else {
1194           II = LIS->getInstructionFromIndex(Info.getAVLVNInfo()->def);
1195           II = std::next(II);
1196         }
1197         assert(II.isValid());
1198         auto AVLCopy =
1199             BuildMI(*II->getParent(), II, DL, TII->get(RISCV::COPY), AVLCopyReg)
1200                 .addReg(AVLReg);
1201         LIS->InsertMachineInstrInMaps(*AVLCopy);
1202         MI->getOperand(1).setReg(AVLCopyReg);
1203         LIS->createAndComputeVirtRegInterval(AVLCopyReg);
1204       }
1205     }
1206   }
1207 }
1208 
1209 /// Return true if a VSETVLI is required to transition from CurInfo to Require
1210 /// given a set of DemandedFields \p Used.
1211 bool RISCVInsertVSETVLI::needVSETVLI(const DemandedFields &Used,
1212                                      const VSETVLIInfo &Require,
1213                                      const VSETVLIInfo &CurInfo) const {
1214   if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
1215     return true;
1216 
1217   if (CurInfo.isCompatible(Used, Require, LIS))
1218     return false;
1219 
1220   return true;
1221 }
1222 
1223 // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1224 // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1225 // places.
1226 static VSETVLIInfo adjustIncoming(const VSETVLIInfo &PrevInfo,
1227                                   const VSETVLIInfo &NewInfo,
1228                                   DemandedFields &Demanded) {
1229   VSETVLIInfo Info = NewInfo;
1230 
1231   if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
1232       !PrevInfo.isUnknown()) {
1233     if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1234             PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
1235       Info.setVLMul(*NewVLMul);
1236     Demanded.LMUL = DemandedFields::LMULEqual;
1237   }
1238 
1239   return Info;
1240 }
1241 
1242 // Given an incoming state reaching MI, minimally modifies that state so that it
1243 // is compatible with MI. The resulting state is guaranteed to be semantically
1244 // legal for MI, but may not be the state requested by MI.
1245 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
1246                                         const MachineInstr &MI) const {
1247   if (isVectorCopy(ST->getRegisterInfo(), MI) &&
1248       (Info.isUnknown() || !Info.isValid() || Info.hasSEWLMULRatioOnly())) {
1249     // Use an arbitrary but valid AVL and VTYPE so vill will be cleared. It may
1250     // be coalesced into another vsetvli since we won't demand any fields.
1251     VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly
1252     NewInfo.setAVLImm(1);
1253     NewInfo.setVTYPE(RISCVII::VLMUL::LMUL_1, /*sew*/ 8, /*ta*/ true,
1254                      /*ma*/ true);
1255     Info = NewInfo;
1256     return;
1257   }
1258 
1259   if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1260     return;
1261 
1262   DemandedFields Demanded = getDemanded(MI, ST);
1263 
1264   const VSETVLIInfo NewInfo = computeInfoForInstr(MI);
1265   assert(NewInfo.isValid() && !NewInfo.isUnknown());
1266   if (Info.isValid() && !needVSETVLI(Demanded, NewInfo, Info))
1267     return;
1268 
1269   const VSETVLIInfo PrevInfo = Info;
1270   if (!Info.isValid() || Info.isUnknown())
1271     Info = NewInfo;
1272 
1273   const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
1274 
1275   // If MI only demands that VL has the same zeroness, we only need to set the
1276   // AVL if the zeroness differs.  This removes a vsetvli entirely if the types
1277   // match or allows use of cheaper avl preserving variant if VLMAX doesn't
1278   // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
1279   // variant, so we avoid the transform to prevent extending live range of an
1280   // avl register operand.
1281   // TODO: We can probably relax this for immediates.
1282   bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, LIS) &&
1283                      IncomingInfo.hasSameVLMAX(PrevInfo);
1284   if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
1285     Info.setAVL(IncomingInfo);
1286 
1287   Info.setVTYPE(
1288       ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
1289           .getVLMUL(),
1290       ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
1291       // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1292       // if needed.
1293       (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
1294           IncomingInfo.getTailAgnostic(),
1295       (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
1296           IncomingInfo.getMaskAgnostic());
1297 
1298   // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
1299   // the AVL.
1300   if (Info.hasSEWLMULRatioOnly()) {
1301     VSETVLIInfo RatiolessInfo = IncomingInfo;
1302     RatiolessInfo.setAVL(Info);
1303     Info = RatiolessInfo;
1304   }
1305 }
1306 
1307 // Given a state with which we evaluated MI (see transferBefore above for why
1308 // this might be different that the state MI requested), modify the state to
1309 // reflect the changes MI might make.
1310 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
1311                                        const MachineInstr &MI) const {
1312   if (isVectorConfigInstr(MI)) {
1313     Info = getInfoForVSETVLI(MI);
1314     return;
1315   }
1316 
1317   if (RISCV::isFaultFirstLoad(MI)) {
1318     // Update AVL to vl-output of the fault first load.
1319     assert(MI.getOperand(1).getReg().isVirtual());
1320     if (LIS) {
1321       auto &LI = LIS->getInterval(MI.getOperand(1).getReg());
1322       SlotIndex SI =
1323           LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
1324       VNInfo *VNI = LI.getVNInfoAt(SI);
1325       Info.setAVLRegDef(VNI, MI.getOperand(1).getReg());
1326     } else
1327       Info.setAVLRegDef(nullptr, MI.getOperand(1).getReg());
1328     return;
1329   }
1330 
1331   // If this is something that updates VL/VTYPE that we don't know about, set
1332   // the state to unknown.
1333   if (MI.isCall() || MI.isInlineAsm() ||
1334       MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1335       MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1336     Info = VSETVLIInfo::getUnknown();
1337 }
1338 
1339 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
1340                                                VSETVLIInfo &Info) const {
1341   bool HadVectorOp = false;
1342 
1343   Info = BlockInfo[MBB.getNumber()].Pred;
1344   for (const MachineInstr &MI : MBB) {
1345     transferBefore(Info, MI);
1346 
1347     if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags) ||
1348         isVectorCopy(ST->getRegisterInfo(), MI))
1349       HadVectorOp = true;
1350 
1351     transferAfter(Info, MI);
1352   }
1353 
1354   return HadVectorOp;
1355 }
1356 
1357 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1358 
1359   BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1360 
1361   BBInfo.InQueue = false;
1362 
1363   // Start with the previous entry so that we keep the most conservative state
1364   // we have ever found.
1365   VSETVLIInfo InInfo = BBInfo.Pred;
1366   if (MBB.pred_empty()) {
1367     // There are no predecessors, so use the default starting status.
1368     InInfo.setUnknown();
1369   } else {
1370     for (MachineBasicBlock *P : MBB.predecessors())
1371       InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1372   }
1373 
1374   // If we don't have any valid predecessor value, wait until we do.
1375   if (!InInfo.isValid())
1376     return;
1377 
1378   // If no change, no need to rerun block
1379   if (InInfo == BBInfo.Pred)
1380     return;
1381 
1382   BBInfo.Pred = InInfo;
1383   LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1384                     << " changed to " << BBInfo.Pred << "\n");
1385 
1386   // Note: It's tempting to cache the state changes here, but due to the
1387   // compatibility checks performed a blocks output state can change based on
1388   // the input state.  To cache, we'd have to add logic for finding
1389   // never-compatible state changes.
1390   VSETVLIInfo TmpStatus;
1391   computeVLVTYPEChanges(MBB, TmpStatus);
1392 
1393   // If the new exit value matches the old exit value, we don't need to revisit
1394   // any blocks.
1395   if (BBInfo.Exit == TmpStatus)
1396     return;
1397 
1398   BBInfo.Exit = TmpStatus;
1399   LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1400                     << " changed to " << BBInfo.Exit << "\n");
1401 
1402   // Add the successors to the work list so we can propagate the changed exit
1403   // status.
1404   for (MachineBasicBlock *S : MBB.successors())
1405     if (!BlockInfo[S->getNumber()].InQueue) {
1406       BlockInfo[S->getNumber()].InQueue = true;
1407       WorkList.push(S);
1408     }
1409 }
1410 
1411 // If we weren't able to prove a vsetvli was directly unneeded, it might still
1412 // be unneeded if the AVL was a phi node where all incoming values are VL
1413 // outputs from the last VSETVLI in their respective basic blocks.
1414 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1415                                         const MachineBasicBlock &MBB) const {
1416   if (!Require.hasAVLReg())
1417     return true;
1418 
1419   if (!LIS)
1420     return true;
1421 
1422   // We need the AVL to have been produced by a PHI node in this basic block.
1423   const VNInfo *Valno = Require.getAVLVNInfo();
1424   if (!Valno->isPHIDef() || LIS->getMBBFromIndex(Valno->def) != &MBB)
1425     return true;
1426 
1427   const LiveRange &LR = LIS->getInterval(Require.getAVLReg());
1428 
1429   for (auto *PBB : MBB.predecessors()) {
1430     const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
1431 
1432     // We need the PHI input to the be the output of a VSET(I)VLI.
1433     const VNInfo *Value = LR.getVNInfoBefore(LIS->getMBBEndIdx(PBB));
1434     if (!Value)
1435       return true;
1436     MachineInstr *DefMI = LIS->getInstructionFromIndex(Value->def);
1437     if (!DefMI || !isVectorConfigInstr(*DefMI))
1438       return true;
1439 
1440     // We found a VSET(I)VLI make sure it matches the output of the
1441     // predecessor block.
1442     VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1443     if (DefInfo != PBBExit)
1444       return true;
1445 
1446     // Require has the same VL as PBBExit, so if the exit from the
1447     // predecessor has the VTYPE we are looking for we might be able
1448     // to avoid a VSETVLI.
1449     if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Require))
1450       return true;
1451   }
1452 
1453   // If all the incoming values to the PHI checked out, we don't need
1454   // to insert a VSETVLI.
1455   return false;
1456 }
1457 
1458 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1459   VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1460   // Track whether the prefix of the block we've scanned is transparent
1461   // (meaning has not yet changed the abstract state).
1462   bool PrefixTransparent = true;
1463   for (MachineInstr &MI : MBB) {
1464     const VSETVLIInfo PrevInfo = CurInfo;
1465     transferBefore(CurInfo, MI);
1466 
1467     // If this is an explicit VSETVLI or VSETIVLI, update our state.
1468     if (isVectorConfigInstr(MI)) {
1469       // Conservatively, mark the VL and VTYPE as live.
1470       assert(MI.getOperand(3).getReg() == RISCV::VL &&
1471              MI.getOperand(4).getReg() == RISCV::VTYPE &&
1472              "Unexpected operands where VL and VTYPE should be");
1473       MI.getOperand(3).setIsDead(false);
1474       MI.getOperand(4).setIsDead(false);
1475       PrefixTransparent = false;
1476     }
1477 
1478     if (EnsureWholeVectorRegisterMoveValidVTYPE &&
1479         isVectorCopy(ST->getRegisterInfo(), MI)) {
1480       if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
1481         insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo);
1482         PrefixTransparent = false;
1483       }
1484       MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1485                                               /*isImp*/ true));
1486     }
1487 
1488     uint64_t TSFlags = MI.getDesc().TSFlags;
1489     if (RISCVII::hasSEWOp(TSFlags)) {
1490       if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
1491         // If this is the first implicit state change, and the state change
1492         // requested can be proven to produce the same register contents, we
1493         // can skip emitting the actual state change and continue as if we
1494         // had since we know the GPR result of the implicit state change
1495         // wouldn't be used and VL/VTYPE registers are correct.  Note that
1496         // we *do* need to model the state as if it changed as while the
1497         // register contents are unchanged, the abstract model can change.
1498         if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1499           insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo);
1500         PrefixTransparent = false;
1501       }
1502 
1503       if (RISCVII::hasVLOp(TSFlags)) {
1504         MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1505         if (VLOp.isReg()) {
1506           Register Reg = VLOp.getReg();
1507 
1508           // Erase the AVL operand from the instruction.
1509           VLOp.setReg(RISCV::NoRegister);
1510           VLOp.setIsKill(false);
1511           if (LIS) {
1512             LiveInterval &LI = LIS->getInterval(Reg);
1513             SmallVector<MachineInstr *> DeadMIs;
1514             LIS->shrinkToUses(&LI, &DeadMIs);
1515             // We might have separate components that need split due to
1516             // needVSETVLIPHI causing us to skip inserting a new VL def.
1517             SmallVector<LiveInterval *> SplitLIs;
1518             LIS->splitSeparateComponents(LI, SplitLIs);
1519 
1520             // If the AVL was an immediate > 31, then it would have been emitted
1521             // as an ADDI. However, the ADDI might not have been used in the
1522             // vsetvli, or a vsetvli might not have been emitted, so it may be
1523             // dead now.
1524             for (MachineInstr *DeadMI : DeadMIs) {
1525               if (!TII->isAddImmediate(*DeadMI, Reg))
1526                 continue;
1527               LIS->RemoveMachineInstrFromMaps(*DeadMI);
1528               DeadMI->eraseFromParent();
1529             }
1530           }
1531         }
1532         MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1533                                                 /*isImp*/ true));
1534       }
1535       MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1536                                               /*isImp*/ true));
1537     }
1538 
1539     if (MI.isCall() || MI.isInlineAsm() ||
1540         MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1541         MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1542       PrefixTransparent = false;
1543 
1544     transferAfter(CurInfo, MI);
1545   }
1546 
1547   const auto &Info = BlockInfo[MBB.getNumber()];
1548   if (CurInfo != Info.Exit) {
1549     LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1550     LLVM_DEBUG(dbgs() << "  begin        state: " << Info.Pred << "\n");
1551     LLVM_DEBUG(dbgs() << "  expected end state: " << Info.Exit << "\n");
1552     LLVM_DEBUG(dbgs() << "  actual   end state: " << CurInfo << "\n");
1553   }
1554   assert(CurInfo == Info.Exit && "InsertVSETVLI dataflow invariant violated");
1555 }
1556 
1557 /// Perform simple partial redundancy elimination of the VSETVLI instructions
1558 /// we're about to insert by looking for cases where we can PRE from the
1559 /// beginning of one block to the end of one of its predecessors.  Specifically,
1560 /// this is geared to catch the common case of a fixed length vsetvl in a single
1561 /// block loop when it could execute once in the preheader instead.
1562 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1563   if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1564     return;
1565 
1566   MachineBasicBlock *UnavailablePred = nullptr;
1567   VSETVLIInfo AvailableInfo;
1568   for (MachineBasicBlock *P : MBB.predecessors()) {
1569     const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1570     if (PredInfo.isUnknown()) {
1571       if (UnavailablePred)
1572         return;
1573       UnavailablePred = P;
1574     } else if (!AvailableInfo.isValid()) {
1575       AvailableInfo = PredInfo;
1576     } else if (AvailableInfo != PredInfo) {
1577       return;
1578     }
1579   }
1580 
1581   // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1582   // phase 3.
1583   if (!UnavailablePred || !AvailableInfo.isValid())
1584     return;
1585 
1586   if (!LIS)
1587     return;
1588 
1589   // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
1590   // the unavailable pred.
1591   if (AvailableInfo.hasSEWLMULRatioOnly())
1592     return;
1593 
1594   // Critical edge - TODO: consider splitting?
1595   if (UnavailablePred->succ_size() != 1)
1596     return;
1597 
1598   // If the AVL value is a register (other than our VLMAX sentinel),
1599   // we need to prove the value is available at the point we're going
1600   // to insert the vsetvli at.
1601   if (AvailableInfo.hasAVLReg()) {
1602     SlotIndex SI = AvailableInfo.getAVLVNInfo()->def;
1603     // This is an inline dominance check which covers the case of
1604     // UnavailablePred being the preheader of a loop.
1605     if (LIS->getMBBFromIndex(SI) != UnavailablePred)
1606       return;
1607     if (!UnavailablePred->terminators().empty() &&
1608         SI >= LIS->getInstructionIndex(*UnavailablePred->getFirstTerminator()))
1609       return;
1610   }
1611 
1612   // Model the effect of changing the input state of the block MBB to
1613   // AvailableInfo.  We're looking for two issues here; one legality,
1614   // one profitability.
1615   // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1616   //    may hit the end of the block with a different end state.  We can
1617   //    not make this change without reflowing later blocks as well.
1618   // 2) If we don't actually remove a transition, inserting a vsetvli
1619   //    into the predecessor block would be correct, but unprofitable.
1620   VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1621   VSETVLIInfo CurInfo = AvailableInfo;
1622   int TransitionsRemoved = 0;
1623   for (const MachineInstr &MI : MBB) {
1624     const VSETVLIInfo LastInfo = CurInfo;
1625     const VSETVLIInfo LastOldInfo = OldInfo;
1626     transferBefore(CurInfo, MI);
1627     transferBefore(OldInfo, MI);
1628     if (CurInfo == LastInfo)
1629       TransitionsRemoved++;
1630     if (LastOldInfo == OldInfo)
1631       TransitionsRemoved--;
1632     transferAfter(CurInfo, MI);
1633     transferAfter(OldInfo, MI);
1634     if (CurInfo == OldInfo)
1635       // Convergence.  All transitions after this must match by construction.
1636       break;
1637   }
1638   if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1639     // Issues 1 and 2 above
1640     return;
1641 
1642   // Finally, update both data flow state and insert the actual vsetvli.
1643   // Doing both keeps the code in sync with the dataflow results, which
1644   // is critical for correctness of phase 3.
1645   auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1646   LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1647                     << UnavailablePred->getName() << " with state "
1648                     << AvailableInfo << "\n");
1649   BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1650   BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1651 
1652   // Note there's an implicit assumption here that terminators never use
1653   // or modify VL or VTYPE.  Also, fallthrough will return end().
1654   auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1655   insertVSETVLI(*UnavailablePred, InsertPt,
1656                 UnavailablePred->findDebugLoc(InsertPt),
1657                 AvailableInfo, OldExit);
1658 }
1659 
1660 // Return true if we can mutate PrevMI to match MI without changing any the
1661 // fields which would be observed.
1662 bool RISCVInsertVSETVLI::canMutatePriorConfig(
1663     const MachineInstr &PrevMI, const MachineInstr &MI,
1664     const DemandedFields &Used) const {
1665   // If the VL values aren't equal, return false if either a) the former is
1666   // demanded, or b) we can't rewrite the former to be the later for
1667   // implementation reasons.
1668   if (!isVLPreservingConfig(MI)) {
1669     if (Used.VLAny)
1670       return false;
1671 
1672     if (Used.VLZeroness) {
1673       if (isVLPreservingConfig(PrevMI))
1674         return false;
1675       if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI),
1676                                                        LIS))
1677         return false;
1678     }
1679 
1680     auto &AVL = MI.getOperand(1);
1681 
1682     // If the AVL is a register, we need to make sure its definition is the same
1683     // at PrevMI as it was at MI.
1684     if (AVL.isReg() && AVL.getReg() != RISCV::X0) {
1685       VNInfo *VNI = getVNInfoFromReg(AVL.getReg(), MI, LIS);
1686       VNInfo *PrevVNI = getVNInfoFromReg(AVL.getReg(), PrevMI, LIS);
1687       if (!VNI || !PrevVNI || VNI != PrevVNI)
1688         return false;
1689     }
1690   }
1691 
1692   assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
1693   auto PriorVType = PrevMI.getOperand(2).getImm();
1694   auto VType = MI.getOperand(2).getImm();
1695   return areCompatibleVTYPEs(PriorVType, VType, Used);
1696 }
1697 
1698 void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
1699   MachineInstr *NextMI = nullptr;
1700   // We can have arbitrary code in successors, so VL and VTYPE
1701   // must be considered demanded.
1702   DemandedFields Used;
1703   Used.demandVL();
1704   Used.demandVTYPE();
1705   SmallVector<MachineInstr*> ToDelete;
1706 
1707   auto dropAVLUse = [&](MachineOperand &MO) {
1708     if (!MO.isReg() || !MO.getReg().isVirtual())
1709       return;
1710     Register OldVLReg = MO.getReg();
1711     MO.setReg(RISCV::NoRegister);
1712 
1713     if (LIS)
1714       LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
1715 
1716     MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
1717     if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
1718         MRI->use_nodbg_empty(OldVLReg))
1719       ToDelete.push_back(VLOpDef);
1720   };
1721 
1722   for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
1723 
1724     if (!isVectorConfigInstr(MI)) {
1725       Used.doUnion(getDemanded(MI, ST));
1726       if (MI.isCall() || MI.isInlineAsm() ||
1727           MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1728           MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1729         NextMI = nullptr;
1730       continue;
1731     }
1732 
1733     if (!MI.getOperand(0).isDead())
1734       Used.demandVL();
1735 
1736     if (NextMI) {
1737       if (!Used.usedVL() && !Used.usedVTYPE()) {
1738         dropAVLUse(MI.getOperand(1));
1739         if (LIS)
1740           LIS->RemoveMachineInstrFromMaps(MI);
1741         MI.eraseFromParent();
1742         NumCoalescedVSETVL++;
1743         // Leave NextMI unchanged
1744         continue;
1745       }
1746 
1747       if (canMutatePriorConfig(MI, *NextMI, Used)) {
1748         if (!isVLPreservingConfig(*NextMI)) {
1749           Register DefReg = NextMI->getOperand(0).getReg();
1750 
1751           MI.getOperand(0).setReg(DefReg);
1752           MI.getOperand(0).setIsDead(false);
1753 
1754           // The def of DefReg moved to MI, so extend the LiveInterval up to
1755           // it.
1756           if (DefReg.isVirtual() && LIS) {
1757             LiveInterval &DefLI = LIS->getInterval(DefReg);
1758             SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
1759             VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1760             LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
1761             DefLI.addSegment(S);
1762             DefVNI->def = MISlot;
1763             // Mark DefLI as spillable if it was previously unspillable
1764             DefLI.setWeight(0);
1765 
1766             // DefReg may have had no uses, in which case we need to shrink
1767             // the LiveInterval up to MI.
1768             LIS->shrinkToUses(&DefLI);
1769           }
1770 
1771           dropAVLUse(MI.getOperand(1));
1772           if (NextMI->getOperand(1).isImm())
1773             MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1774           else
1775             MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(),
1776                                               false);
1777 
1778           MI.setDesc(NextMI->getDesc());
1779         }
1780         MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1781 
1782         dropAVLUse(NextMI->getOperand(1));
1783         if (LIS)
1784           LIS->RemoveMachineInstrFromMaps(*NextMI);
1785         NextMI->eraseFromParent();
1786         NumCoalescedVSETVL++;
1787         // fallthrough
1788       }
1789     }
1790     NextMI = &MI;
1791     Used = getDemanded(MI, ST);
1792   }
1793 
1794   // Loop over the dead AVL values, and delete them now.  This has
1795   // to be outside the above loop to avoid invalidating iterators.
1796   for (auto *MI : ToDelete) {
1797     if (LIS) {
1798       LIS->removeInterval(MI->getOperand(0).getReg());
1799       LIS->RemoveMachineInstrFromMaps(*MI);
1800     }
1801     MI->eraseFromParent();
1802   }
1803 }
1804 
1805 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1806   for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1807     MachineInstr &MI = *I++;
1808     if (RISCV::isFaultFirstLoad(MI)) {
1809       Register VLOutput = MI.getOperand(1).getReg();
1810       assert(VLOutput.isVirtual());
1811       if (!MI.getOperand(1).isDead()) {
1812         auto ReadVLMI = BuildMI(MBB, I, MI.getDebugLoc(),
1813                                 TII->get(RISCV::PseudoReadVL), VLOutput);
1814         // Move the LiveInterval's definition down to PseudoReadVL.
1815         if (LIS) {
1816           SlotIndex NewDefSI =
1817               LIS->InsertMachineInstrInMaps(*ReadVLMI).getRegSlot();
1818           LiveInterval &DefLI = LIS->getInterval(VLOutput);
1819           VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1820           DefLI.removeSegment(DefLI.beginIndex(), NewDefSI);
1821           DefVNI->def = NewDefSI;
1822         }
1823       }
1824       // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1825       MI.getOperand(1).setReg(RISCV::X0);
1826     }
1827   }
1828 }
1829 
1830 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1831   // Skip if the vector extension is not enabled.
1832   ST = &MF.getSubtarget<RISCVSubtarget>();
1833   if (!ST->hasVInstructions())
1834     return false;
1835 
1836   LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1837 
1838   TII = ST->getInstrInfo();
1839   MRI = &MF.getRegInfo();
1840   auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
1841   LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
1842 
1843   assert(BlockInfo.empty() && "Expect empty block infos");
1844   BlockInfo.resize(MF.getNumBlockIDs());
1845 
1846   bool HaveVectorOp = false;
1847 
1848   // Phase 1 - determine how VL/VTYPE are affected by the each block.
1849   for (const MachineBasicBlock &MBB : MF) {
1850     VSETVLIInfo TmpStatus;
1851     HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
1852     // Initial exit state is whatever change we found in the block.
1853     BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1854     BBInfo.Exit = TmpStatus;
1855     LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1856                       << " is " << BBInfo.Exit << "\n");
1857 
1858   }
1859 
1860   // If we didn't find any instructions that need VSETVLI, we're done.
1861   if (!HaveVectorOp) {
1862     BlockInfo.clear();
1863     return false;
1864   }
1865 
1866   // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1867   // blocks to the list here, but will also add any that need to be revisited
1868   // during Phase 2 processing.
1869   for (const MachineBasicBlock &MBB : MF) {
1870     WorkList.push(&MBB);
1871     BlockInfo[MBB.getNumber()].InQueue = true;
1872   }
1873   while (!WorkList.empty()) {
1874     const MachineBasicBlock &MBB = *WorkList.front();
1875     WorkList.pop();
1876     computeIncomingVLVTYPE(MBB);
1877   }
1878 
1879   // Perform partial redundancy elimination of vsetvli transitions.
1880   for (MachineBasicBlock &MBB : MF)
1881     doPRE(MBB);
1882 
1883   // Phase 3 - add any vsetvli instructions needed in the block. Use the
1884   // Phase 2 information to avoid adding vsetvlis before the first vector
1885   // instruction in the block if the VL/VTYPE is satisfied by its
1886   // predecessors.
1887   for (MachineBasicBlock &MBB : MF)
1888     emitVSETVLIs(MBB);
1889 
1890   // Now that all vsetvlis are explicit, go through and do block local
1891   // DSE and peephole based demanded fields based transforms.  Note that
1892   // this *must* be done outside the main dataflow so long as we allow
1893   // any cross block analysis within the dataflow.  We can't have both
1894   // demanded fields based mutation and non-local analysis in the
1895   // dataflow at the same time without introducing inconsistencies.
1896   for (MachineBasicBlock &MBB : MF)
1897     coalesceVSETVLIs(MBB);
1898 
1899   // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1900   // of VLEFF/VLSEGFF.
1901   for (MachineBasicBlock &MBB : MF)
1902     insertReadVL(MBB);
1903 
1904   BlockInfo.clear();
1905   return HaveVectorOp;
1906 }
1907 
1908 /// Returns an instance of the Insert VSETVLI pass.
1909 FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
1910   return new RISCVInsertVSETVLI();
1911 }
1912