xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp (revision 1838bd0f4839006b42d41a02a787b7f578655223)
1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a function pass that inserts VSETVLI instructions where
10 // needed.
11 //
12 // This pass consists of 3 phases:
13 //
14 // Phase 1 collects how each basic block affects VL/VTYPE.
15 //
16 // Phase 2 uses the information from phase 1 to do a data flow analysis to
17 // propagate the VL/VTYPE changes through the function. This gives us the
18 // VL/VTYPE at the start of each basic block.
19 //
20 // Phase 3 inserts VSETVLI instructions in each basic block. Information from
21 // phase 2 is used to prevent inserting a VSETVLI before the first vector
22 // instruction in the block if possible.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "RISCV.h"
27 #include "RISCVSubtarget.h"
28 #include "llvm/CodeGen/LiveIntervals.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include <queue>
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "riscv-insert-vsetvli"
34 #define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass"
35 
36 static cl::opt<bool> DisableInsertVSETVLPHIOpt(
37     "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
38     cl::desc("Disable looking through phis when inserting vsetvlis."));
39 
40 namespace {
41 
42 class VSETVLIInfo {
43   union {
44     Register AVLReg;
45     unsigned AVLImm;
46   };
47 
48   enum : uint8_t {
49     Uninitialized,
50     AVLIsReg,
51     AVLIsImm,
52     Unknown,
53   } State = Uninitialized;
54 
55   // Fields from VTYPE.
56   RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
57   uint8_t SEW = 0;
58   uint8_t TailAgnostic : 1;
59   uint8_t MaskAgnostic : 1;
60   uint8_t MaskRegOp : 1;
61   uint8_t StoreOp : 1;
62   uint8_t ScalarMovOp : 1;
63   uint8_t SEWLMULRatioOnly : 1;
64 
65 public:
66   VSETVLIInfo()
67       : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), MaskRegOp(false),
68         StoreOp(false), ScalarMovOp(false), SEWLMULRatioOnly(false) {}
69 
70   static VSETVLIInfo getUnknown() {
71     VSETVLIInfo Info;
72     Info.setUnknown();
73     return Info;
74   }
75 
76   bool isValid() const { return State != Uninitialized; }
77   void setUnknown() { State = Unknown; }
78   bool isUnknown() const { return State == Unknown; }
79 
80   void setAVLReg(Register Reg) {
81     AVLReg = Reg;
82     State = AVLIsReg;
83   }
84 
85   void setAVLImm(unsigned Imm) {
86     AVLImm = Imm;
87     State = AVLIsImm;
88   }
89 
90   bool hasAVLImm() const { return State == AVLIsImm; }
91   bool hasAVLReg() const { return State == AVLIsReg; }
92   Register getAVLReg() const {
93     assert(hasAVLReg());
94     return AVLReg;
95   }
96   unsigned getAVLImm() const {
97     assert(hasAVLImm());
98     return AVLImm;
99   }
100   bool hasZeroAVL() const {
101     if (hasAVLImm())
102       return getAVLImm() == 0;
103     return false;
104   }
105   bool hasNonZeroAVL() const {
106     if (hasAVLImm())
107       return getAVLImm() > 0;
108     if (hasAVLReg())
109       return getAVLReg() == RISCV::X0;
110     return false;
111   }
112 
113   bool hasSameAVL(const VSETVLIInfo &Other) const {
114     assert(isValid() && Other.isValid() &&
115            "Can't compare invalid VSETVLIInfos");
116     assert(!isUnknown() && !Other.isUnknown() &&
117            "Can't compare AVL in unknown state");
118     if (hasAVLReg() && Other.hasAVLReg())
119       return getAVLReg() == Other.getAVLReg();
120 
121     if (hasAVLImm() && Other.hasAVLImm())
122       return getAVLImm() == Other.getAVLImm();
123 
124     return false;
125   }
126 
127   void setVTYPE(unsigned VType) {
128     assert(isValid() && !isUnknown() &&
129            "Can't set VTYPE for uninitialized or unknown");
130     VLMul = RISCVVType::getVLMUL(VType);
131     SEW = RISCVVType::getSEW(VType);
132     TailAgnostic = RISCVVType::isTailAgnostic(VType);
133     MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
134   }
135   void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA, bool MRO,
136                 bool IsStore, bool IsScalarMovOp) {
137     assert(isValid() && !isUnknown() &&
138            "Can't set VTYPE for uninitialized or unknown");
139     VLMul = L;
140     SEW = S;
141     TailAgnostic = TA;
142     MaskAgnostic = MA;
143     MaskRegOp = MRO;
144     StoreOp = IsStore;
145     ScalarMovOp = IsScalarMovOp;
146   }
147 
148   unsigned encodeVTYPE() const {
149     assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
150            "Can't encode VTYPE for uninitialized or unknown");
151     return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
152   }
153 
154   bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
155 
156   bool hasSameSEW(const VSETVLIInfo &Other) const {
157     assert(isValid() && Other.isValid() &&
158            "Can't compare invalid VSETVLIInfos");
159     assert(!isUnknown() && !Other.isUnknown() &&
160            "Can't compare VTYPE in unknown state");
161     assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
162            "Can't compare when only LMUL/SEW ratio is valid.");
163     return SEW == Other.SEW;
164   }
165 
166   bool hasSameVTYPE(const VSETVLIInfo &Other) const {
167     assert(isValid() && Other.isValid() &&
168            "Can't compare invalid VSETVLIInfos");
169     assert(!isUnknown() && !Other.isUnknown() &&
170            "Can't compare VTYPE in unknown state");
171     assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
172            "Can't compare when only LMUL/SEW ratio is valid.");
173     return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
174            std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
175                     Other.MaskAgnostic);
176   }
177 
178   static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) {
179     unsigned LMul;
180     bool Fractional;
181     std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul);
182 
183     // Convert LMul to a fixed point value with 3 fractional bits.
184     LMul = Fractional ? (8 / LMul) : (LMul * 8);
185 
186     assert(SEW >= 8 && "Unexpected SEW value");
187     return (SEW * 8) / LMul;
188   }
189 
190   unsigned getSEWLMULRatio() const {
191     assert(isValid() && !isUnknown() &&
192            "Can't use VTYPE for uninitialized or unknown");
193     return getSEWLMULRatio(SEW, VLMul);
194   }
195 
196   // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
197   bool hasSameVLMAX(const VSETVLIInfo &Other) const {
198     assert(isValid() && Other.isValid() &&
199            "Can't compare invalid VSETVLIInfos");
200     assert(!isUnknown() && !Other.isUnknown() &&
201            "Can't compare VTYPE in unknown state");
202     return getSEWLMULRatio() == Other.getSEWLMULRatio();
203   }
204 
205   bool hasSamePolicy(const VSETVLIInfo &Other) const {
206     assert(isValid() && Other.isValid() &&
207            "Can't compare invalid VSETVLIInfos");
208     assert(!isUnknown() && !Other.isUnknown() &&
209            "Can't compare VTYPE in unknown state");
210     return TailAgnostic == Other.TailAgnostic &&
211            MaskAgnostic == Other.MaskAgnostic;
212   }
213 
214   bool hasCompatibleVTYPE(const VSETVLIInfo &InstrInfo, bool Strict) const {
215     // Simple case, see if full VTYPE matches.
216     if (hasSameVTYPE(InstrInfo))
217       return true;
218 
219     if (Strict)
220       return false;
221 
222     // If this is a mask reg operation, it only cares about VLMAX.
223     // FIXME: Mask reg operations are probably ok if "this" VLMAX is larger
224     // than "InstrInfo".
225     // FIXME: The policy bits can probably be ignored for mask reg operations.
226     if (InstrInfo.MaskRegOp && hasSameVLMAX(InstrInfo) &&
227         TailAgnostic == InstrInfo.TailAgnostic &&
228         MaskAgnostic == InstrInfo.MaskAgnostic)
229       return true;
230 
231     return false;
232   }
233 
234   // Determine whether the vector instructions requirements represented by
235   // InstrInfo are compatible with the previous vsetvli instruction represented
236   // by this.
237   bool isCompatible(const VSETVLIInfo &InstrInfo, bool Strict) const {
238     assert(isValid() && InstrInfo.isValid() &&
239            "Can't compare invalid VSETVLIInfos");
240     assert(!InstrInfo.SEWLMULRatioOnly &&
241            "Expected a valid VTYPE for instruction!");
242     // Nothing is compatible with Unknown.
243     if (isUnknown() || InstrInfo.isUnknown())
244       return false;
245 
246     // If only our VLMAX ratio is valid, then this isn't compatible.
247     if (SEWLMULRatioOnly)
248       return false;
249 
250     // If the instruction doesn't need an AVLReg and the SEW matches, consider
251     // it compatible.
252     if (!Strict && InstrInfo.hasAVLReg() &&
253         InstrInfo.AVLReg == RISCV::NoRegister) {
254       if (SEW == InstrInfo.SEW)
255         return true;
256     }
257 
258     // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0.
259     // So it's compatible when we could make sure that both VL be the same
260     // situation.
261     if (!Strict && InstrInfo.ScalarMovOp && InstrInfo.hasAVLImm() &&
262         ((hasNonZeroAVL() && InstrInfo.hasNonZeroAVL()) ||
263          (hasZeroAVL() && InstrInfo.hasZeroAVL())) &&
264         hasSameSEW(InstrInfo) && hasSamePolicy(InstrInfo))
265       return true;
266 
267     // The AVL must match.
268     if (!hasSameAVL(InstrInfo))
269       return false;
270 
271     if (hasCompatibleVTYPE(InstrInfo, Strict))
272       return true;
273 
274     // Strict matches must ensure a full VTYPE match.
275     if (Strict)
276       return false;
277 
278     // Store instructions don't use the policy fields.
279     // TODO: Move into hasCompatibleVTYPE?
280     if (InstrInfo.StoreOp && VLMul == InstrInfo.VLMul && SEW == InstrInfo.SEW)
281       return true;
282 
283     // Anything else is not compatible.
284     return false;
285   }
286 
287   bool isCompatibleWithLoadStoreEEW(unsigned EEW,
288                                     const VSETVLIInfo &InstrInfo) const {
289     assert(isValid() && InstrInfo.isValid() &&
290            "Can't compare invalid VSETVLIInfos");
291     assert(!InstrInfo.SEWLMULRatioOnly &&
292            "Expected a valid VTYPE for instruction!");
293     assert(EEW == InstrInfo.SEW && "Mismatched EEW/SEW for store");
294 
295     if (isUnknown() || hasSEWLMULRatioOnly())
296       return false;
297 
298     if (!hasSameAVL(InstrInfo))
299       return false;
300 
301     // Stores can ignore the tail and mask policies.
302     if (!InstrInfo.StoreOp && (TailAgnostic != InstrInfo.TailAgnostic ||
303                                MaskAgnostic != InstrInfo.MaskAgnostic))
304       return false;
305 
306     return getSEWLMULRatio() == getSEWLMULRatio(EEW, InstrInfo.VLMul);
307   }
308 
309   bool operator==(const VSETVLIInfo &Other) const {
310     // Uninitialized is only equal to another Uninitialized.
311     if (!isValid())
312       return !Other.isValid();
313     if (!Other.isValid())
314       return !isValid();
315 
316     // Unknown is only equal to another Unknown.
317     if (isUnknown())
318       return Other.isUnknown();
319     if (Other.isUnknown())
320       return isUnknown();
321 
322     if (!hasSameAVL(Other))
323       return false;
324 
325     // If only the VLMAX is valid, check that it is the same.
326     if (SEWLMULRatioOnly && Other.SEWLMULRatioOnly)
327       return hasSameVLMAX(Other);
328 
329     // If the full VTYPE is valid, check that it is the same.
330     if (!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly)
331       return hasSameVTYPE(Other);
332 
333     // If the SEWLMULRatioOnly bits are different, then they aren't equal.
334     return false;
335   }
336 
337   // Calculate the VSETVLIInfo visible to a block assuming this and Other are
338   // both predecessors.
339   VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
340     // If the new value isn't valid, ignore it.
341     if (!Other.isValid())
342       return *this;
343 
344     // If this value isn't valid, this must be the first predecessor, use it.
345     if (!isValid())
346       return Other;
347 
348     // If either is unknown, the result is unknown.
349     if (isUnknown() || Other.isUnknown())
350       return VSETVLIInfo::getUnknown();
351 
352     // If we have an exact, match return this.
353     if (*this == Other)
354       return *this;
355 
356     // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
357     // return an SEW/LMUL ratio only value.
358     if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
359       VSETVLIInfo MergeInfo = *this;
360       MergeInfo.SEWLMULRatioOnly = true;
361       return MergeInfo;
362     }
363 
364     // Otherwise the result is unknown.
365     return VSETVLIInfo::getUnknown();
366   }
367 
368   // Calculate the VSETVLIInfo visible at the end of the block assuming this
369   // is the predecessor value, and Other is change for this block.
370   VSETVLIInfo merge(const VSETVLIInfo &Other) const {
371     assert(isValid() && "Can only merge with a valid VSETVLInfo");
372 
373     // Nothing changed from the predecessor, keep it.
374     if (!Other.isValid())
375       return *this;
376 
377     // If the change is compatible with the input, we won't create a VSETVLI
378     // and should keep the predecessor.
379     if (isCompatible(Other, /*Strict*/ true))
380       return *this;
381 
382     // Otherwise just use whatever is in this block.
383     return Other;
384   }
385 };
386 
387 struct BlockData {
388   // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers
389   // made by this block. Calculated in Phase 1.
390   VSETVLIInfo Change;
391 
392   // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
393   // block. Calculated in Phase 2.
394   VSETVLIInfo Exit;
395 
396   // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
397   // blocks. Calculated in Phase 2, and used by Phase 3.
398   VSETVLIInfo Pred;
399 
400   // Keeps track of whether the block is already in the queue.
401   bool InQueue = false;
402 
403   BlockData() {}
404 };
405 
406 class RISCVInsertVSETVLI : public MachineFunctionPass {
407   const TargetInstrInfo *TII;
408   MachineRegisterInfo *MRI;
409 
410   std::vector<BlockData> BlockInfo;
411   std::queue<const MachineBasicBlock *> WorkList;
412 
413 public:
414   static char ID;
415 
416   RISCVInsertVSETVLI() : MachineFunctionPass(ID) {
417     initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry());
418   }
419   bool runOnMachineFunction(MachineFunction &MF) override;
420 
421   void getAnalysisUsage(AnalysisUsage &AU) const override {
422     AU.setPreservesCFG();
423     MachineFunctionPass::getAnalysisUsage(AU);
424   }
425 
426   StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
427 
428 private:
429   bool needVSETVLI(const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo);
430   bool needVSETVLIPHI(const VSETVLIInfo &Require, const MachineBasicBlock &MBB);
431   void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
432                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
433 
434   bool computeVLVTYPEChanges(const MachineBasicBlock &MBB);
435   void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
436   void emitVSETVLIs(MachineBasicBlock &MBB);
437 };
438 
439 } // end anonymous namespace
440 
441 char RISCVInsertVSETVLI::ID = 0;
442 
443 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
444                 false, false)
445 
446 static MachineInstr *elideCopies(MachineInstr *MI,
447                                  const MachineRegisterInfo *MRI) {
448   while (true) {
449     if (!MI->isFullCopy())
450       return MI;
451     if (!Register::isVirtualRegister(MI->getOperand(1).getReg()))
452       return nullptr;
453     MI = MRI->getVRegDef(MI->getOperand(1).getReg());
454     if (!MI)
455       return nullptr;
456   }
457 }
458 
459 static bool isScalarMoveInstr(const MachineInstr &MI) {
460   switch (MI.getOpcode()) {
461   default:
462     return false;
463   case RISCV::PseudoVMV_S_X_M1:
464   case RISCV::PseudoVMV_S_X_M2:
465   case RISCV::PseudoVMV_S_X_M4:
466   case RISCV::PseudoVMV_S_X_M8:
467   case RISCV::PseudoVMV_S_X_MF2:
468   case RISCV::PseudoVMV_S_X_MF4:
469   case RISCV::PseudoVMV_S_X_MF8:
470   case RISCV::PseudoVFMV_S_F16_M1:
471   case RISCV::PseudoVFMV_S_F16_M2:
472   case RISCV::PseudoVFMV_S_F16_M4:
473   case RISCV::PseudoVFMV_S_F16_M8:
474   case RISCV::PseudoVFMV_S_F16_MF2:
475   case RISCV::PseudoVFMV_S_F16_MF4:
476   case RISCV::PseudoVFMV_S_F32_M1:
477   case RISCV::PseudoVFMV_S_F32_M2:
478   case RISCV::PseudoVFMV_S_F32_M4:
479   case RISCV::PseudoVFMV_S_F32_M8:
480   case RISCV::PseudoVFMV_S_F32_MF2:
481   case RISCV::PseudoVFMV_S_F64_M1:
482   case RISCV::PseudoVFMV_S_F64_M2:
483   case RISCV::PseudoVFMV_S_F64_M4:
484   case RISCV::PseudoVFMV_S_F64_M8:
485     return true;
486   }
487 }
488 
489 static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
490                                        const MachineRegisterInfo *MRI) {
491   VSETVLIInfo InstrInfo;
492   unsigned NumOperands = MI.getNumExplicitOperands();
493   bool HasPolicy = RISCVII::hasVecPolicyOp(TSFlags);
494 
495   // Default to tail agnostic unless the destination is tied to a source.
496   // Unless the source is undef. In that case the user would have some control
497   // over the tail values. Some pseudo instructions force a tail agnostic policy
498   // despite having a tied def.
499   bool ForceTailAgnostic = RISCVII::doesForceTailAgnostic(TSFlags);
500   bool TailAgnostic = true;
501   // If the instruction has policy argument, use the argument.
502   if (HasPolicy) {
503     const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
504     TailAgnostic = Op.getImm() & 0x1;
505   }
506 
507   unsigned UseOpIdx;
508   if (!(ForceTailAgnostic || (HasPolicy && TailAgnostic)) &&
509       MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
510     TailAgnostic = false;
511     // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
512     const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
513     MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg());
514     if (UseMI) {
515       UseMI = elideCopies(UseMI, MRI);
516       if (UseMI && UseMI->isImplicitDef())
517         TailAgnostic = true;
518     }
519   }
520 
521   // Remove the tail policy so we can find the SEW and VL.
522   if (HasPolicy)
523     --NumOperands;
524 
525   RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
526 
527   unsigned Log2SEW = MI.getOperand(NumOperands - 1).getImm();
528   // A Log2SEW of 0 is an operation on mask registers only.
529   bool MaskRegOp = Log2SEW == 0;
530   unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
531   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
532 
533   // If there are no explicit defs, this is a store instruction which can
534   // ignore the tail and mask policies.
535   bool StoreOp = MI.getNumExplicitDefs() == 0;
536   bool ScalarMovOp = isScalarMoveInstr(MI);
537 
538   if (RISCVII::hasVLOp(TSFlags)) {
539     const MachineOperand &VLOp = MI.getOperand(NumOperands - 2);
540     if (VLOp.isImm()) {
541       int64_t Imm = VLOp.getImm();
542       // Conver the VLMax sentintel to X0 register.
543       if (Imm == RISCV::VLMaxSentinel)
544         InstrInfo.setAVLReg(RISCV::X0);
545       else
546         InstrInfo.setAVLImm(Imm);
547     } else {
548       InstrInfo.setAVLReg(VLOp.getReg());
549     }
550   } else
551     InstrInfo.setAVLReg(RISCV::NoRegister);
552   InstrInfo.setVTYPE(VLMul, SEW, /*TailAgnostic*/ TailAgnostic,
553                      /*MaskAgnostic*/ false, MaskRegOp, StoreOp, ScalarMovOp);
554 
555   return InstrInfo;
556 }
557 
558 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
559                                        const VSETVLIInfo &Info,
560                                        const VSETVLIInfo &PrevInfo) {
561   DebugLoc DL = MI.getDebugLoc();
562 
563   // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
564   // VLMAX.
565   if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
566       Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
567     BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLIX0))
568         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
569         .addReg(RISCV::X0, RegState::Kill)
570         .addImm(Info.encodeVTYPE())
571         .addReg(RISCV::VL, RegState::Implicit);
572     return;
573   }
574 
575   if (Info.hasAVLImm()) {
576     BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETIVLI))
577         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
578         .addImm(Info.getAVLImm())
579         .addImm(Info.encodeVTYPE());
580     return;
581   }
582 
583   Register AVLReg = Info.getAVLReg();
584   if (AVLReg == RISCV::NoRegister) {
585     // We can only use x0, x0 if there's no chance of the vtype change causing
586     // the previous vl to become invalid.
587     if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
588         Info.hasSameVLMAX(PrevInfo)) {
589       BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLIX0))
590           .addReg(RISCV::X0, RegState::Define | RegState::Dead)
591           .addReg(RISCV::X0, RegState::Kill)
592           .addImm(Info.encodeVTYPE())
593           .addReg(RISCV::VL, RegState::Implicit);
594       return;
595     }
596     // Otherwise use an AVL of 0 to avoid depending on previous vl.
597     BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETIVLI))
598         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
599         .addImm(0)
600         .addImm(Info.encodeVTYPE());
601     return;
602   }
603 
604   if (AVLReg.isVirtual())
605     MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
606 
607   // Use X0 as the DestReg unless AVLReg is X0. We also need to change the
608   // opcode if the AVLReg is X0 as they have different register classes for
609   // the AVL operand.
610   Register DestReg = RISCV::X0;
611   unsigned Opcode = RISCV::PseudoVSETVLI;
612   if (AVLReg == RISCV::X0) {
613     DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
614     Opcode = RISCV::PseudoVSETVLIX0;
615   }
616   BuildMI(MBB, MI, DL, TII->get(Opcode))
617       .addReg(DestReg, RegState::Define | RegState::Dead)
618       .addReg(AVLReg)
619       .addImm(Info.encodeVTYPE());
620 }
621 
622 // Return a VSETVLIInfo representing the changes made by this VSETVLI or
623 // VSETIVLI instruction.
624 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
625   VSETVLIInfo NewInfo;
626   if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
627     NewInfo.setAVLImm(MI.getOperand(1).getImm());
628   } else {
629     assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
630            MI.getOpcode() == RISCV::PseudoVSETVLIX0);
631     Register AVLReg = MI.getOperand(1).getReg();
632     assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
633            "Can't handle X0, X0 vsetvli yet");
634     NewInfo.setAVLReg(AVLReg);
635   }
636   NewInfo.setVTYPE(MI.getOperand(2).getImm());
637 
638   return NewInfo;
639 }
640 
641 bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require,
642                                      const VSETVLIInfo &CurInfo) {
643   if (CurInfo.isCompatible(Require, /*Strict*/ false))
644     return false;
645 
646   // We didn't find a compatible value. If our AVL is a virtual register,
647   // it might be defined by a VSET(I)VLI. If it has the same VTYPE we need
648   // and the last VL/VTYPE we observed is the same, we don't need a
649   // VSETVLI here.
650   if (!CurInfo.isUnknown() && Require.hasAVLReg() &&
651       Require.getAVLReg().isVirtual() && !CurInfo.hasSEWLMULRatioOnly() &&
652       CurInfo.hasCompatibleVTYPE(Require, /*Strict*/ false)) {
653     if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
654       if (DefMI->getOpcode() == RISCV::PseudoVSETVLI ||
655           DefMI->getOpcode() == RISCV::PseudoVSETVLIX0 ||
656           DefMI->getOpcode() == RISCV::PseudoVSETIVLI) {
657         VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
658         if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVTYPE(CurInfo))
659           return false;
660       }
661     }
662   }
663 
664   return true;
665 }
666 
667 bool canSkipVSETVLIForLoadStore(const MachineInstr &MI,
668                                 const VSETVLIInfo &Require,
669                                 const VSETVLIInfo &CurInfo) {
670   unsigned EEW;
671   switch (MI.getOpcode()) {
672   default:
673     return false;
674   case RISCV::PseudoVLE8_V_M1:
675   case RISCV::PseudoVLE8_V_M1_MASK:
676   case RISCV::PseudoVLE8_V_M2:
677   case RISCV::PseudoVLE8_V_M2_MASK:
678   case RISCV::PseudoVLE8_V_M4:
679   case RISCV::PseudoVLE8_V_M4_MASK:
680   case RISCV::PseudoVLE8_V_M8:
681   case RISCV::PseudoVLE8_V_M8_MASK:
682   case RISCV::PseudoVLE8_V_MF2:
683   case RISCV::PseudoVLE8_V_MF2_MASK:
684   case RISCV::PseudoVLE8_V_MF4:
685   case RISCV::PseudoVLE8_V_MF4_MASK:
686   case RISCV::PseudoVLE8_V_MF8:
687   case RISCV::PseudoVLE8_V_MF8_MASK:
688   case RISCV::PseudoVLSE8_V_M1:
689   case RISCV::PseudoVLSE8_V_M1_MASK:
690   case RISCV::PseudoVLSE8_V_M2:
691   case RISCV::PseudoVLSE8_V_M2_MASK:
692   case RISCV::PseudoVLSE8_V_M4:
693   case RISCV::PseudoVLSE8_V_M4_MASK:
694   case RISCV::PseudoVLSE8_V_M8:
695   case RISCV::PseudoVLSE8_V_M8_MASK:
696   case RISCV::PseudoVLSE8_V_MF2:
697   case RISCV::PseudoVLSE8_V_MF2_MASK:
698   case RISCV::PseudoVLSE8_V_MF4:
699   case RISCV::PseudoVLSE8_V_MF4_MASK:
700   case RISCV::PseudoVLSE8_V_MF8:
701   case RISCV::PseudoVLSE8_V_MF8_MASK:
702   case RISCV::PseudoVSE8_V_M1:
703   case RISCV::PseudoVSE8_V_M1_MASK:
704   case RISCV::PseudoVSE8_V_M2:
705   case RISCV::PseudoVSE8_V_M2_MASK:
706   case RISCV::PseudoVSE8_V_M4:
707   case RISCV::PseudoVSE8_V_M4_MASK:
708   case RISCV::PseudoVSE8_V_M8:
709   case RISCV::PseudoVSE8_V_M8_MASK:
710   case RISCV::PseudoVSE8_V_MF2:
711   case RISCV::PseudoVSE8_V_MF2_MASK:
712   case RISCV::PseudoVSE8_V_MF4:
713   case RISCV::PseudoVSE8_V_MF4_MASK:
714   case RISCV::PseudoVSE8_V_MF8:
715   case RISCV::PseudoVSE8_V_MF8_MASK:
716   case RISCV::PseudoVSSE8_V_M1:
717   case RISCV::PseudoVSSE8_V_M1_MASK:
718   case RISCV::PseudoVSSE8_V_M2:
719   case RISCV::PseudoVSSE8_V_M2_MASK:
720   case RISCV::PseudoVSSE8_V_M4:
721   case RISCV::PseudoVSSE8_V_M4_MASK:
722   case RISCV::PseudoVSSE8_V_M8:
723   case RISCV::PseudoVSSE8_V_M8_MASK:
724   case RISCV::PseudoVSSE8_V_MF2:
725   case RISCV::PseudoVSSE8_V_MF2_MASK:
726   case RISCV::PseudoVSSE8_V_MF4:
727   case RISCV::PseudoVSSE8_V_MF4_MASK:
728   case RISCV::PseudoVSSE8_V_MF8:
729   case RISCV::PseudoVSSE8_V_MF8_MASK:
730     EEW = 8;
731     break;
732   case RISCV::PseudoVLE16_V_M1:
733   case RISCV::PseudoVLE16_V_M1_MASK:
734   case RISCV::PseudoVLE16_V_M2:
735   case RISCV::PseudoVLE16_V_M2_MASK:
736   case RISCV::PseudoVLE16_V_M4:
737   case RISCV::PseudoVLE16_V_M4_MASK:
738   case RISCV::PseudoVLE16_V_M8:
739   case RISCV::PseudoVLE16_V_M8_MASK:
740   case RISCV::PseudoVLE16_V_MF2:
741   case RISCV::PseudoVLE16_V_MF2_MASK:
742   case RISCV::PseudoVLE16_V_MF4:
743   case RISCV::PseudoVLE16_V_MF4_MASK:
744   case RISCV::PseudoVLSE16_V_M1:
745   case RISCV::PseudoVLSE16_V_M1_MASK:
746   case RISCV::PseudoVLSE16_V_M2:
747   case RISCV::PseudoVLSE16_V_M2_MASK:
748   case RISCV::PseudoVLSE16_V_M4:
749   case RISCV::PseudoVLSE16_V_M4_MASK:
750   case RISCV::PseudoVLSE16_V_M8:
751   case RISCV::PseudoVLSE16_V_M8_MASK:
752   case RISCV::PseudoVLSE16_V_MF2:
753   case RISCV::PseudoVLSE16_V_MF2_MASK:
754   case RISCV::PseudoVLSE16_V_MF4:
755   case RISCV::PseudoVLSE16_V_MF4_MASK:
756   case RISCV::PseudoVSE16_V_M1:
757   case RISCV::PseudoVSE16_V_M1_MASK:
758   case RISCV::PseudoVSE16_V_M2:
759   case RISCV::PseudoVSE16_V_M2_MASK:
760   case RISCV::PseudoVSE16_V_M4:
761   case RISCV::PseudoVSE16_V_M4_MASK:
762   case RISCV::PseudoVSE16_V_M8:
763   case RISCV::PseudoVSE16_V_M8_MASK:
764   case RISCV::PseudoVSE16_V_MF2:
765   case RISCV::PseudoVSE16_V_MF2_MASK:
766   case RISCV::PseudoVSE16_V_MF4:
767   case RISCV::PseudoVSE16_V_MF4_MASK:
768   case RISCV::PseudoVSSE16_V_M1:
769   case RISCV::PseudoVSSE16_V_M1_MASK:
770   case RISCV::PseudoVSSE16_V_M2:
771   case RISCV::PseudoVSSE16_V_M2_MASK:
772   case RISCV::PseudoVSSE16_V_M4:
773   case RISCV::PseudoVSSE16_V_M4_MASK:
774   case RISCV::PseudoVSSE16_V_M8:
775   case RISCV::PseudoVSSE16_V_M8_MASK:
776   case RISCV::PseudoVSSE16_V_MF2:
777   case RISCV::PseudoVSSE16_V_MF2_MASK:
778   case RISCV::PseudoVSSE16_V_MF4:
779   case RISCV::PseudoVSSE16_V_MF4_MASK:
780     EEW = 16;
781     break;
782   case RISCV::PseudoVLE32_V_M1:
783   case RISCV::PseudoVLE32_V_M1_MASK:
784   case RISCV::PseudoVLE32_V_M2:
785   case RISCV::PseudoVLE32_V_M2_MASK:
786   case RISCV::PseudoVLE32_V_M4:
787   case RISCV::PseudoVLE32_V_M4_MASK:
788   case RISCV::PseudoVLE32_V_M8:
789   case RISCV::PseudoVLE32_V_M8_MASK:
790   case RISCV::PseudoVLE32_V_MF2:
791   case RISCV::PseudoVLE32_V_MF2_MASK:
792   case RISCV::PseudoVLSE32_V_M1:
793   case RISCV::PseudoVLSE32_V_M1_MASK:
794   case RISCV::PseudoVLSE32_V_M2:
795   case RISCV::PseudoVLSE32_V_M2_MASK:
796   case RISCV::PseudoVLSE32_V_M4:
797   case RISCV::PseudoVLSE32_V_M4_MASK:
798   case RISCV::PseudoVLSE32_V_M8:
799   case RISCV::PseudoVLSE32_V_M8_MASK:
800   case RISCV::PseudoVLSE32_V_MF2:
801   case RISCV::PseudoVLSE32_V_MF2_MASK:
802   case RISCV::PseudoVSE32_V_M1:
803   case RISCV::PseudoVSE32_V_M1_MASK:
804   case RISCV::PseudoVSE32_V_M2:
805   case RISCV::PseudoVSE32_V_M2_MASK:
806   case RISCV::PseudoVSE32_V_M4:
807   case RISCV::PseudoVSE32_V_M4_MASK:
808   case RISCV::PseudoVSE32_V_M8:
809   case RISCV::PseudoVSE32_V_M8_MASK:
810   case RISCV::PseudoVSE32_V_MF2:
811   case RISCV::PseudoVSE32_V_MF2_MASK:
812   case RISCV::PseudoVSSE32_V_M1:
813   case RISCV::PseudoVSSE32_V_M1_MASK:
814   case RISCV::PseudoVSSE32_V_M2:
815   case RISCV::PseudoVSSE32_V_M2_MASK:
816   case RISCV::PseudoVSSE32_V_M4:
817   case RISCV::PseudoVSSE32_V_M4_MASK:
818   case RISCV::PseudoVSSE32_V_M8:
819   case RISCV::PseudoVSSE32_V_M8_MASK:
820   case RISCV::PseudoVSSE32_V_MF2:
821   case RISCV::PseudoVSSE32_V_MF2_MASK:
822     EEW = 32;
823     break;
824   case RISCV::PseudoVLE64_V_M1:
825   case RISCV::PseudoVLE64_V_M1_MASK:
826   case RISCV::PseudoVLE64_V_M2:
827   case RISCV::PseudoVLE64_V_M2_MASK:
828   case RISCV::PseudoVLE64_V_M4:
829   case RISCV::PseudoVLE64_V_M4_MASK:
830   case RISCV::PseudoVLE64_V_M8:
831   case RISCV::PseudoVLE64_V_M8_MASK:
832   case RISCV::PseudoVLSE64_V_M1:
833   case RISCV::PseudoVLSE64_V_M1_MASK:
834   case RISCV::PseudoVLSE64_V_M2:
835   case RISCV::PseudoVLSE64_V_M2_MASK:
836   case RISCV::PseudoVLSE64_V_M4:
837   case RISCV::PseudoVLSE64_V_M4_MASK:
838   case RISCV::PseudoVLSE64_V_M8:
839   case RISCV::PseudoVLSE64_V_M8_MASK:
840   case RISCV::PseudoVSE64_V_M1:
841   case RISCV::PseudoVSE64_V_M1_MASK:
842   case RISCV::PseudoVSE64_V_M2:
843   case RISCV::PseudoVSE64_V_M2_MASK:
844   case RISCV::PseudoVSE64_V_M4:
845   case RISCV::PseudoVSE64_V_M4_MASK:
846   case RISCV::PseudoVSE64_V_M8:
847   case RISCV::PseudoVSE64_V_M8_MASK:
848   case RISCV::PseudoVSSE64_V_M1:
849   case RISCV::PseudoVSSE64_V_M1_MASK:
850   case RISCV::PseudoVSSE64_V_M2:
851   case RISCV::PseudoVSSE64_V_M2_MASK:
852   case RISCV::PseudoVSSE64_V_M4:
853   case RISCV::PseudoVSSE64_V_M4_MASK:
854   case RISCV::PseudoVSSE64_V_M8:
855   case RISCV::PseudoVSSE64_V_M8_MASK:
856     EEW = 64;
857     break;
858   }
859 
860   return CurInfo.isCompatibleWithLoadStoreEEW(EEW, Require);
861 }
862 
863 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
864   bool HadVectorOp = false;
865 
866   BlockData &BBInfo = BlockInfo[MBB.getNumber()];
867   for (const MachineInstr &MI : MBB) {
868     // If this is an explicit VSETVLI or VSETIVLI, update our state.
869     if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
870         MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
871         MI.getOpcode() == RISCV::PseudoVSETIVLI) {
872       HadVectorOp = true;
873       BBInfo.Change = getInfoForVSETVLI(MI);
874       continue;
875     }
876 
877     uint64_t TSFlags = MI.getDesc().TSFlags;
878     if (RISCVII::hasSEWOp(TSFlags)) {
879       HadVectorOp = true;
880 
881       VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
882 
883       if (!BBInfo.Change.isValid()) {
884         BBInfo.Change = NewInfo;
885       } else {
886         // If this instruction isn't compatible with the previous VL/VTYPE
887         // we need to insert a VSETVLI.
888         // If this is a unit-stride or strided load/store, we may be able to use
889         // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype.
890         // NOTE: We only do this if the vtype we're comparing against was
891         // created in this block. We need the first and third phase to treat
892         // the store the same way.
893         if (!canSkipVSETVLIForLoadStore(MI, NewInfo, BBInfo.Change) &&
894             needVSETVLI(NewInfo, BBInfo.Change))
895           BBInfo.Change = NewInfo;
896       }
897     }
898 
899     // If this is something that updates VL/VTYPE that we don't know about, set
900     // the state to unknown.
901     if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
902         MI.modifiesRegister(RISCV::VTYPE)) {
903       BBInfo.Change = VSETVLIInfo::getUnknown();
904     }
905   }
906 
907   // Initial exit state is whatever change we found in the block.
908   BBInfo.Exit = BBInfo.Change;
909 
910   return HadVectorOp;
911 }
912 
913 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
914   BlockData &BBInfo = BlockInfo[MBB.getNumber()];
915 
916   BBInfo.InQueue = false;
917 
918   VSETVLIInfo InInfo;
919   if (MBB.pred_empty()) {
920     // There are no predecessors, so use the default starting status.
921     InInfo.setUnknown();
922   } else {
923     for (MachineBasicBlock *P : MBB.predecessors())
924       InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
925   }
926 
927   // If we don't have any valid predecessor value, wait until we do.
928   if (!InInfo.isValid())
929     return;
930 
931   BBInfo.Pred = InInfo;
932 
933   VSETVLIInfo TmpStatus = BBInfo.Pred.merge(BBInfo.Change);
934 
935   // If the new exit value matches the old exit value, we don't need to revisit
936   // any blocks.
937   if (BBInfo.Exit == TmpStatus)
938     return;
939 
940   BBInfo.Exit = TmpStatus;
941 
942   // Add the successors to the work list so we can propagate the changed exit
943   // status.
944   for (MachineBasicBlock *S : MBB.successors())
945     if (!BlockInfo[S->getNumber()].InQueue)
946       WorkList.push(S);
947 }
948 
949 // If we weren't able to prove a vsetvli was directly unneeded, it might still
950 // be/ unneeded if the AVL is a phi node where all incoming values are VL
951 // outputs from the last VSETVLI in their respective basic blocks.
952 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
953                                         const MachineBasicBlock &MBB) {
954   if (DisableInsertVSETVLPHIOpt)
955     return true;
956 
957   if (!Require.hasAVLReg())
958     return true;
959 
960   Register AVLReg = Require.getAVLReg();
961   if (!AVLReg.isVirtual())
962     return true;
963 
964   // We need the AVL to be produce by a PHI node in this basic block.
965   MachineInstr *PHI = MRI->getVRegDef(AVLReg);
966   if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
967     return true;
968 
969   for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
970        PHIOp += 2) {
971     Register InReg = PHI->getOperand(PHIOp).getReg();
972     MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
973     const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
974     // If the exit from the predecessor has the VTYPE we are looking for
975     // we might be able to avoid a VSETVLI.
976     if (PBBInfo.Exit.isUnknown() ||
977         !PBBInfo.Exit.hasCompatibleVTYPE(Require, /*Strict*/ false))
978       return true;
979 
980     // We need the PHI input to the be the output of a VSET(I)VLI.
981     MachineInstr *DefMI = MRI->getVRegDef(InReg);
982     if (!DefMI || (DefMI->getOpcode() != RISCV::PseudoVSETVLI &&
983                    DefMI->getOpcode() != RISCV::PseudoVSETVLIX0 &&
984                    DefMI->getOpcode() != RISCV::PseudoVSETIVLI))
985       return true;
986 
987     // We found a VSET(I)VLI make sure it matches the output of the
988     // predecessor block.
989     VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
990     if (!DefInfo.hasSameAVL(PBBInfo.Exit) ||
991         !DefInfo.hasSameVTYPE(PBBInfo.Exit))
992       return true;
993   }
994 
995   // If all the incoming values to the PHI checked out, we don't need
996   // to insert a VSETVLI.
997   return false;
998 }
999 
1000 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1001   VSETVLIInfo CurInfo;
1002   // BBLocalInfo tracks the VL/VTYPE state the same way BBInfo.Change was
1003   // calculated in computeIncomingVLVTYPE. We need this to apply
1004   // canSkipVSETVLIForLoadStore the same way computeIncomingVLVTYPE did. We
1005   // can't include predecessor information in that decision to avoid disagreeing
1006   // with the global analysis.
1007   VSETVLIInfo BBLocalInfo;
1008   // Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI.
1009   MachineInstr *PrevVSETVLIMI = nullptr;
1010 
1011   for (MachineInstr &MI : MBB) {
1012     // If this is an explicit VSETVLI or VSETIVLI, update our state.
1013     if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
1014         MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
1015         MI.getOpcode() == RISCV::PseudoVSETIVLI) {
1016       // Conservatively, mark the VL and VTYPE as live.
1017       assert(MI.getOperand(3).getReg() == RISCV::VL &&
1018              MI.getOperand(4).getReg() == RISCV::VTYPE &&
1019              "Unexpected operands where VL and VTYPE should be");
1020       MI.getOperand(3).setIsDead(false);
1021       MI.getOperand(4).setIsDead(false);
1022       CurInfo = getInfoForVSETVLI(MI);
1023       BBLocalInfo = getInfoForVSETVLI(MI);
1024       PrevVSETVLIMI = &MI;
1025       continue;
1026     }
1027 
1028     uint64_t TSFlags = MI.getDesc().TSFlags;
1029     if (RISCVII::hasSEWOp(TSFlags)) {
1030       VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
1031       if (RISCVII::hasVLOp(TSFlags)) {
1032         unsigned Offset = 2;
1033         if (RISCVII::hasVecPolicyOp(TSFlags))
1034           Offset = 3;
1035         MachineOperand &VLOp =
1036             MI.getOperand(MI.getNumExplicitOperands() - Offset);
1037         if (VLOp.isReg()) {
1038           // Erase the AVL operand from the instruction.
1039           VLOp.setReg(RISCV::NoRegister);
1040           VLOp.setIsKill(false);
1041         }
1042         MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1043                                                 /*isImp*/ true));
1044       }
1045       MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1046                                               /*isImp*/ true));
1047 
1048       if (!CurInfo.isValid()) {
1049         // We haven't found any vector instructions or VL/VTYPE changes yet,
1050         // use the predecessor information.
1051         assert(BlockInfo[MBB.getNumber()].Pred.isValid() &&
1052                "Expected a valid predecessor state.");
1053         // Don't use predecessor information if there was an earlier instruction
1054         // in this block that allowed a vsetvli to be skipped for load/store.
1055         if (!(BBLocalInfo.isValid() &&
1056               canSkipVSETVLIForLoadStore(MI, NewInfo, BBLocalInfo)) &&
1057             needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) &&
1058             needVSETVLIPHI(NewInfo, MBB)) {
1059           insertVSETVLI(MBB, MI, NewInfo, BlockInfo[MBB.getNumber()].Pred);
1060           CurInfo = NewInfo;
1061           BBLocalInfo = NewInfo;
1062         }
1063 
1064         // We must update BBLocalInfo for every vector instruction.
1065         if (!BBLocalInfo.isValid())
1066           BBLocalInfo = NewInfo;
1067       } else {
1068         assert(BBLocalInfo.isValid());
1069         // If this instruction isn't compatible with the previous VL/VTYPE
1070         // we need to insert a VSETVLI.
1071         // If this is a unit-stride or strided load/store, we may be able to use
1072         // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype.
1073         // NOTE: We can't use predecessor information for the store. We must
1074         // treat it the same as the first phase so that we produce the correct
1075         // vl/vtype for succesor blocks.
1076         if (!canSkipVSETVLIForLoadStore(MI, NewInfo, CurInfo) &&
1077             needVSETVLI(NewInfo, CurInfo)) {
1078           // If the previous VL/VTYPE is set by VSETVLI and do not use, Merge it
1079           // with current VL/VTYPE.
1080           bool NeedInsertVSETVLI = true;
1081           if (PrevVSETVLIMI) {
1082             bool HasSameAVL =
1083                 CurInfo.hasSameAVL(NewInfo) ||
1084                 (NewInfo.hasAVLReg() && NewInfo.getAVLReg().isVirtual() &&
1085                  NewInfo.getAVLReg() == PrevVSETVLIMI->getOperand(0).getReg());
1086             // If these two VSETVLI have the same AVL and the same VLMAX,
1087             // we could merge these two VSETVLI.
1088             if (HasSameAVL &&
1089                 CurInfo.getSEWLMULRatio() == NewInfo.getSEWLMULRatio()) {
1090               PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE());
1091               NeedInsertVSETVLI = false;
1092             }
1093             if (isScalarMoveInstr(MI) &&
1094                 ((CurInfo.hasNonZeroAVL() && NewInfo.hasNonZeroAVL()) ||
1095                  (CurInfo.hasZeroAVL() && NewInfo.hasZeroAVL())) &&
1096                 NewInfo.hasSameVLMAX(CurInfo)) {
1097               PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE());
1098               NeedInsertVSETVLI = false;
1099             }
1100           }
1101           if (NeedInsertVSETVLI)
1102             insertVSETVLI(MBB, MI, NewInfo, CurInfo);
1103           CurInfo = NewInfo;
1104           BBLocalInfo = NewInfo;
1105         }
1106       }
1107       PrevVSETVLIMI = nullptr;
1108     }
1109 
1110     // If this is something updates VL/VTYPE that we don't know about, set
1111     // the state to unknown.
1112     if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1113         MI.modifiesRegister(RISCV::VTYPE)) {
1114       CurInfo = VSETVLIInfo::getUnknown();
1115       BBLocalInfo = VSETVLIInfo::getUnknown();
1116       PrevVSETVLIMI = nullptr;
1117     }
1118   }
1119 }
1120 
1121 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1122   // Skip if the vector extension is not enabled.
1123   const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
1124   if (!ST.hasVInstructions())
1125     return false;
1126 
1127   TII = ST.getInstrInfo();
1128   MRI = &MF.getRegInfo();
1129 
1130   assert(BlockInfo.empty() && "Expect empty block infos");
1131   BlockInfo.resize(MF.getNumBlockIDs());
1132 
1133   bool HaveVectorOp = false;
1134 
1135   // Phase 1 - determine how VL/VTYPE are affected by the each block.
1136   for (const MachineBasicBlock &MBB : MF)
1137     HaveVectorOp |= computeVLVTYPEChanges(MBB);
1138 
1139   // If we didn't find any instructions that need VSETVLI, we're done.
1140   if (HaveVectorOp) {
1141     // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1142     // blocks to the list here, but will also add any that need to be revisited
1143     // during Phase 2 processing.
1144     for (const MachineBasicBlock &MBB : MF) {
1145       WorkList.push(&MBB);
1146       BlockInfo[MBB.getNumber()].InQueue = true;
1147     }
1148     while (!WorkList.empty()) {
1149       const MachineBasicBlock &MBB = *WorkList.front();
1150       WorkList.pop();
1151       computeIncomingVLVTYPE(MBB);
1152     }
1153 
1154     // Phase 3 - add any vsetvli instructions needed in the block. Use the
1155     // Phase 2 information to avoid adding vsetvlis before the first vector
1156     // instruction in the block if the VL/VTYPE is satisfied by its
1157     // predecessors.
1158     for (MachineBasicBlock &MBB : MF)
1159       emitVSETVLIs(MBB);
1160   }
1161 
1162   BlockInfo.clear();
1163 
1164   return HaveVectorOp;
1165 }
1166 
1167 /// Returns an instance of the Insert VSETVLI pass.
1168 FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
1169   return new RISCVInsertVSETVLI();
1170 }
1171