xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp (revision 753f127f3ace09432b2baeffd71a308760641a62)
1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a function pass that inserts VSETVLI instructions where
10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11 // instructions.
12 //
13 // This pass consists of 3 phases:
14 //
15 // Phase 1 collects how each basic block affects VL/VTYPE.
16 //
17 // Phase 2 uses the information from phase 1 to do a data flow analysis to
18 // propagate the VL/VTYPE changes through the function. This gives us the
19 // VL/VTYPE at the start of each basic block.
20 //
21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from
22 // phase 2 is used to prevent inserting a VSETVLI before the first vector
23 // instruction in the block if possible.
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "RISCV.h"
28 #include "RISCVSubtarget.h"
29 #include "llvm/CodeGen/LiveIntervals.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include <queue>
32 using namespace llvm;
33 
34 #define DEBUG_TYPE "riscv-insert-vsetvli"
35 #define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass"
36 
37 static cl::opt<bool> DisableInsertVSETVLPHIOpt(
38     "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
39     cl::desc("Disable looking through phis when inserting vsetvlis."));
40 
41 static cl::opt<bool> UseStrictAsserts(
42     "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden,
43     cl::desc("Enable strict assertion checking for the dataflow algorithm"));
44 
45 namespace {
46 
47 static unsigned getVLOpNum(const MachineInstr &MI) {
48   return RISCVII::getVLOpNum(MI.getDesc());
49 }
50 
51 static unsigned getSEWOpNum(const MachineInstr &MI) {
52   return RISCVII::getSEWOpNum(MI.getDesc());
53 }
54 
55 static bool isScalarMoveInstr(const MachineInstr &MI) {
56   switch (MI.getOpcode()) {
57   default:
58     return false;
59   case RISCV::PseudoVMV_S_X_M1:
60   case RISCV::PseudoVMV_S_X_M2:
61   case RISCV::PseudoVMV_S_X_M4:
62   case RISCV::PseudoVMV_S_X_M8:
63   case RISCV::PseudoVMV_S_X_MF2:
64   case RISCV::PseudoVMV_S_X_MF4:
65   case RISCV::PseudoVMV_S_X_MF8:
66   case RISCV::PseudoVFMV_S_F16_M1:
67   case RISCV::PseudoVFMV_S_F16_M2:
68   case RISCV::PseudoVFMV_S_F16_M4:
69   case RISCV::PseudoVFMV_S_F16_M8:
70   case RISCV::PseudoVFMV_S_F16_MF2:
71   case RISCV::PseudoVFMV_S_F16_MF4:
72   case RISCV::PseudoVFMV_S_F32_M1:
73   case RISCV::PseudoVFMV_S_F32_M2:
74   case RISCV::PseudoVFMV_S_F32_M4:
75   case RISCV::PseudoVFMV_S_F32_M8:
76   case RISCV::PseudoVFMV_S_F32_MF2:
77   case RISCV::PseudoVFMV_S_F64_M1:
78   case RISCV::PseudoVFMV_S_F64_M2:
79   case RISCV::PseudoVFMV_S_F64_M4:
80   case RISCV::PseudoVFMV_S_F64_M8:
81     return true;
82   }
83 }
84 
85 /// Get the EEW for a load or store instruction.  Return None if MI is not
86 /// a load or store which ignores SEW.
87 static Optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
88   switch (MI.getOpcode()) {
89   default:
90     return None;
91   case RISCV::PseudoVLE8_V_M1:
92   case RISCV::PseudoVLE8_V_M1_MASK:
93   case RISCV::PseudoVLE8_V_M2:
94   case RISCV::PseudoVLE8_V_M2_MASK:
95   case RISCV::PseudoVLE8_V_M4:
96   case RISCV::PseudoVLE8_V_M4_MASK:
97   case RISCV::PseudoVLE8_V_M8:
98   case RISCV::PseudoVLE8_V_M8_MASK:
99   case RISCV::PseudoVLE8_V_MF2:
100   case RISCV::PseudoVLE8_V_MF2_MASK:
101   case RISCV::PseudoVLE8_V_MF4:
102   case RISCV::PseudoVLE8_V_MF4_MASK:
103   case RISCV::PseudoVLE8_V_MF8:
104   case RISCV::PseudoVLE8_V_MF8_MASK:
105   case RISCV::PseudoVLSE8_V_M1:
106   case RISCV::PseudoVLSE8_V_M1_MASK:
107   case RISCV::PseudoVLSE8_V_M2:
108   case RISCV::PseudoVLSE8_V_M2_MASK:
109   case RISCV::PseudoVLSE8_V_M4:
110   case RISCV::PseudoVLSE8_V_M4_MASK:
111   case RISCV::PseudoVLSE8_V_M8:
112   case RISCV::PseudoVLSE8_V_M8_MASK:
113   case RISCV::PseudoVLSE8_V_MF2:
114   case RISCV::PseudoVLSE8_V_MF2_MASK:
115   case RISCV::PseudoVLSE8_V_MF4:
116   case RISCV::PseudoVLSE8_V_MF4_MASK:
117   case RISCV::PseudoVLSE8_V_MF8:
118   case RISCV::PseudoVLSE8_V_MF8_MASK:
119   case RISCV::PseudoVSE8_V_M1:
120   case RISCV::PseudoVSE8_V_M1_MASK:
121   case RISCV::PseudoVSE8_V_M2:
122   case RISCV::PseudoVSE8_V_M2_MASK:
123   case RISCV::PseudoVSE8_V_M4:
124   case RISCV::PseudoVSE8_V_M4_MASK:
125   case RISCV::PseudoVSE8_V_M8:
126   case RISCV::PseudoVSE8_V_M8_MASK:
127   case RISCV::PseudoVSE8_V_MF2:
128   case RISCV::PseudoVSE8_V_MF2_MASK:
129   case RISCV::PseudoVSE8_V_MF4:
130   case RISCV::PseudoVSE8_V_MF4_MASK:
131   case RISCV::PseudoVSE8_V_MF8:
132   case RISCV::PseudoVSE8_V_MF8_MASK:
133   case RISCV::PseudoVSSE8_V_M1:
134   case RISCV::PseudoVSSE8_V_M1_MASK:
135   case RISCV::PseudoVSSE8_V_M2:
136   case RISCV::PseudoVSSE8_V_M2_MASK:
137   case RISCV::PseudoVSSE8_V_M4:
138   case RISCV::PseudoVSSE8_V_M4_MASK:
139   case RISCV::PseudoVSSE8_V_M8:
140   case RISCV::PseudoVSSE8_V_M8_MASK:
141   case RISCV::PseudoVSSE8_V_MF2:
142   case RISCV::PseudoVSSE8_V_MF2_MASK:
143   case RISCV::PseudoVSSE8_V_MF4:
144   case RISCV::PseudoVSSE8_V_MF4_MASK:
145   case RISCV::PseudoVSSE8_V_MF8:
146   case RISCV::PseudoVSSE8_V_MF8_MASK:
147     return 8;
148   case RISCV::PseudoVLE16_V_M1:
149   case RISCV::PseudoVLE16_V_M1_MASK:
150   case RISCV::PseudoVLE16_V_M2:
151   case RISCV::PseudoVLE16_V_M2_MASK:
152   case RISCV::PseudoVLE16_V_M4:
153   case RISCV::PseudoVLE16_V_M4_MASK:
154   case RISCV::PseudoVLE16_V_M8:
155   case RISCV::PseudoVLE16_V_M8_MASK:
156   case RISCV::PseudoVLE16_V_MF2:
157   case RISCV::PseudoVLE16_V_MF2_MASK:
158   case RISCV::PseudoVLE16_V_MF4:
159   case RISCV::PseudoVLE16_V_MF4_MASK:
160   case RISCV::PseudoVLSE16_V_M1:
161   case RISCV::PseudoVLSE16_V_M1_MASK:
162   case RISCV::PseudoVLSE16_V_M2:
163   case RISCV::PseudoVLSE16_V_M2_MASK:
164   case RISCV::PseudoVLSE16_V_M4:
165   case RISCV::PseudoVLSE16_V_M4_MASK:
166   case RISCV::PseudoVLSE16_V_M8:
167   case RISCV::PseudoVLSE16_V_M8_MASK:
168   case RISCV::PseudoVLSE16_V_MF2:
169   case RISCV::PseudoVLSE16_V_MF2_MASK:
170   case RISCV::PseudoVLSE16_V_MF4:
171   case RISCV::PseudoVLSE16_V_MF4_MASK:
172   case RISCV::PseudoVSE16_V_M1:
173   case RISCV::PseudoVSE16_V_M1_MASK:
174   case RISCV::PseudoVSE16_V_M2:
175   case RISCV::PseudoVSE16_V_M2_MASK:
176   case RISCV::PseudoVSE16_V_M4:
177   case RISCV::PseudoVSE16_V_M4_MASK:
178   case RISCV::PseudoVSE16_V_M8:
179   case RISCV::PseudoVSE16_V_M8_MASK:
180   case RISCV::PseudoVSE16_V_MF2:
181   case RISCV::PseudoVSE16_V_MF2_MASK:
182   case RISCV::PseudoVSE16_V_MF4:
183   case RISCV::PseudoVSE16_V_MF4_MASK:
184   case RISCV::PseudoVSSE16_V_M1:
185   case RISCV::PseudoVSSE16_V_M1_MASK:
186   case RISCV::PseudoVSSE16_V_M2:
187   case RISCV::PseudoVSSE16_V_M2_MASK:
188   case RISCV::PseudoVSSE16_V_M4:
189   case RISCV::PseudoVSSE16_V_M4_MASK:
190   case RISCV::PseudoVSSE16_V_M8:
191   case RISCV::PseudoVSSE16_V_M8_MASK:
192   case RISCV::PseudoVSSE16_V_MF2:
193   case RISCV::PseudoVSSE16_V_MF2_MASK:
194   case RISCV::PseudoVSSE16_V_MF4:
195   case RISCV::PseudoVSSE16_V_MF4_MASK:
196     return 16;
197   case RISCV::PseudoVLE32_V_M1:
198   case RISCV::PseudoVLE32_V_M1_MASK:
199   case RISCV::PseudoVLE32_V_M2:
200   case RISCV::PseudoVLE32_V_M2_MASK:
201   case RISCV::PseudoVLE32_V_M4:
202   case RISCV::PseudoVLE32_V_M4_MASK:
203   case RISCV::PseudoVLE32_V_M8:
204   case RISCV::PseudoVLE32_V_M8_MASK:
205   case RISCV::PseudoVLE32_V_MF2:
206   case RISCV::PseudoVLE32_V_MF2_MASK:
207   case RISCV::PseudoVLSE32_V_M1:
208   case RISCV::PseudoVLSE32_V_M1_MASK:
209   case RISCV::PseudoVLSE32_V_M2:
210   case RISCV::PseudoVLSE32_V_M2_MASK:
211   case RISCV::PseudoVLSE32_V_M4:
212   case RISCV::PseudoVLSE32_V_M4_MASK:
213   case RISCV::PseudoVLSE32_V_M8:
214   case RISCV::PseudoVLSE32_V_M8_MASK:
215   case RISCV::PseudoVLSE32_V_MF2:
216   case RISCV::PseudoVLSE32_V_MF2_MASK:
217   case RISCV::PseudoVSE32_V_M1:
218   case RISCV::PseudoVSE32_V_M1_MASK:
219   case RISCV::PseudoVSE32_V_M2:
220   case RISCV::PseudoVSE32_V_M2_MASK:
221   case RISCV::PseudoVSE32_V_M4:
222   case RISCV::PseudoVSE32_V_M4_MASK:
223   case RISCV::PseudoVSE32_V_M8:
224   case RISCV::PseudoVSE32_V_M8_MASK:
225   case RISCV::PseudoVSE32_V_MF2:
226   case RISCV::PseudoVSE32_V_MF2_MASK:
227   case RISCV::PseudoVSSE32_V_M1:
228   case RISCV::PseudoVSSE32_V_M1_MASK:
229   case RISCV::PseudoVSSE32_V_M2:
230   case RISCV::PseudoVSSE32_V_M2_MASK:
231   case RISCV::PseudoVSSE32_V_M4:
232   case RISCV::PseudoVSSE32_V_M4_MASK:
233   case RISCV::PseudoVSSE32_V_M8:
234   case RISCV::PseudoVSSE32_V_M8_MASK:
235   case RISCV::PseudoVSSE32_V_MF2:
236   case RISCV::PseudoVSSE32_V_MF2_MASK:
237     return 32;
238   case RISCV::PseudoVLE64_V_M1:
239   case RISCV::PseudoVLE64_V_M1_MASK:
240   case RISCV::PseudoVLE64_V_M2:
241   case RISCV::PseudoVLE64_V_M2_MASK:
242   case RISCV::PseudoVLE64_V_M4:
243   case RISCV::PseudoVLE64_V_M4_MASK:
244   case RISCV::PseudoVLE64_V_M8:
245   case RISCV::PseudoVLE64_V_M8_MASK:
246   case RISCV::PseudoVLSE64_V_M1:
247   case RISCV::PseudoVLSE64_V_M1_MASK:
248   case RISCV::PseudoVLSE64_V_M2:
249   case RISCV::PseudoVLSE64_V_M2_MASK:
250   case RISCV::PseudoVLSE64_V_M4:
251   case RISCV::PseudoVLSE64_V_M4_MASK:
252   case RISCV::PseudoVLSE64_V_M8:
253   case RISCV::PseudoVLSE64_V_M8_MASK:
254   case RISCV::PseudoVSE64_V_M1:
255   case RISCV::PseudoVSE64_V_M1_MASK:
256   case RISCV::PseudoVSE64_V_M2:
257   case RISCV::PseudoVSE64_V_M2_MASK:
258   case RISCV::PseudoVSE64_V_M4:
259   case RISCV::PseudoVSE64_V_M4_MASK:
260   case RISCV::PseudoVSE64_V_M8:
261   case RISCV::PseudoVSE64_V_M8_MASK:
262   case RISCV::PseudoVSSE64_V_M1:
263   case RISCV::PseudoVSSE64_V_M1_MASK:
264   case RISCV::PseudoVSSE64_V_M2:
265   case RISCV::PseudoVSSE64_V_M2_MASK:
266   case RISCV::PseudoVSSE64_V_M4:
267   case RISCV::PseudoVSSE64_V_M4_MASK:
268   case RISCV::PseudoVSSE64_V_M8:
269   case RISCV::PseudoVSSE64_V_M8_MASK:
270     return 64;
271   }
272 }
273 
274 /// Return true if this is an operation on mask registers.  Note that
275 /// this includes both arithmetic/logical ops and load/store (vlm/vsm).
276 static bool isMaskRegOp(const MachineInstr &MI) {
277   if (RISCVII::hasSEWOp(MI.getDesc().TSFlags)) {
278     const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
279     // A Log2SEW of 0 is an operation on mask registers only.
280     return Log2SEW == 0;
281   }
282   return false;
283 }
284 
285 static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) {
286   unsigned LMul;
287   bool Fractional;
288   std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul);
289 
290   // Convert LMul to a fixed point value with 3 fractional bits.
291   LMul = Fractional ? (8 / LMul) : (LMul * 8);
292 
293   assert(SEW >= 8 && "Unexpected SEW value");
294   return (SEW * 8) / LMul;
295 }
296 
297 /// Which subfields of VL or VTYPE have values we need to preserve?
298 struct DemandedFields {
299   bool VL = false;
300   bool SEW = false;
301   bool LMUL = false;
302   bool SEWLMULRatio = false;
303   bool TailPolicy = false;
304   bool MaskPolicy = false;
305 
306   // Return true if any part of VTYPE was used
307   bool usedVTYPE() {
308     return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
309   }
310 
311   // Mark all VTYPE subfields and properties as demanded
312   void demandVTYPE() {
313     SEW = true;
314     LMUL = true;
315     SEWLMULRatio = true;
316     TailPolicy = true;
317     MaskPolicy = true;
318   }
319 };
320 
321 /// Return true if the two values of the VTYPE register provided are
322 /// indistinguishable from the perspective of an instruction (or set of
323 /// instructions) which use only the Used subfields and properties.
324 static bool areCompatibleVTYPEs(uint64_t VType1,
325                                 uint64_t VType2,
326                                 const DemandedFields &Used) {
327   if (Used.SEW &&
328       RISCVVType::getSEW(VType1) != RISCVVType::getSEW(VType2))
329     return false;
330 
331   if (Used.LMUL &&
332       RISCVVType::getVLMUL(VType1) != RISCVVType::getVLMUL(VType2))
333     return false;
334 
335   if (Used.SEWLMULRatio) {
336     auto Ratio1 = getSEWLMULRatio(RISCVVType::getSEW(VType1),
337                                   RISCVVType::getVLMUL(VType1));
338     auto Ratio2 = getSEWLMULRatio(RISCVVType::getSEW(VType2),
339                                   RISCVVType::getVLMUL(VType2));
340     if (Ratio1 != Ratio2)
341       return false;
342   }
343 
344   if (Used.TailPolicy &&
345       RISCVVType::isTailAgnostic(VType1) != RISCVVType::isTailAgnostic(VType2))
346     return false;
347   if (Used.MaskPolicy &&
348       RISCVVType::isMaskAgnostic(VType1) != RISCVVType::isMaskAgnostic(VType2))
349     return false;
350   return true;
351 }
352 
353 /// Return the fields and properties demanded by the provided instruction.
354 static DemandedFields getDemanded(const MachineInstr &MI) {
355   // Warning: This function has to work on both the lowered (i.e. post
356   // emitVSETVLIs) and pre-lowering forms.  The main implication of this is
357   // that it can't use the value of a SEW, VL, or Policy operand as they might
358   // be stale after lowering.
359 
360   // Most instructions don't use any of these subfeilds.
361   DemandedFields Res;
362   // Start conservative if registers are used
363   if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL))
364     Res.VL = true;
365   if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE))
366     Res.demandVTYPE();
367   // Start conservative on the unlowered form too
368   uint64_t TSFlags = MI.getDesc().TSFlags;
369   if (RISCVII::hasSEWOp(TSFlags)) {
370     Res.demandVTYPE();
371     if (RISCVII::hasVLOp(TSFlags))
372       Res.VL = true;
373   }
374 
375   // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
376   // They instead demand the ratio of the two which is used in computing
377   // EMUL, but which allows us the flexibility to change SEW and LMUL
378   // provided we don't change the ratio.
379   // Note: We assume that the instructions initial SEW is the EEW encoded
380   // in the opcode.  This is asserted when constructing the VSETVLIInfo.
381   if (getEEWForLoadStore(MI)) {
382     Res.SEW = false;
383     Res.LMUL = false;
384   }
385 
386   // Store instructions don't use the policy fields.
387   if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
388     Res.TailPolicy = false;
389     Res.MaskPolicy = false;
390   }
391 
392   // If this is a mask reg operation, it only cares about VLMAX.
393   // TODO: Possible extensions to this logic
394   // * Probably ok if available VLMax is larger than demanded
395   // * The policy bits can probably be ignored..
396   if (isMaskRegOp(MI)) {
397     Res.SEW = false;
398     Res.LMUL = false;
399   }
400 
401   return Res;
402 }
403 
404 /// Defines the abstract state with which the forward dataflow models the
405 /// values of the VL and VTYPE registers after insertion.
406 class VSETVLIInfo {
407   union {
408     Register AVLReg;
409     unsigned AVLImm;
410   };
411 
412   enum : uint8_t {
413     Uninitialized,
414     AVLIsReg,
415     AVLIsImm,
416     Unknown,
417   } State = Uninitialized;
418 
419   // Fields from VTYPE.
420   RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
421   uint8_t SEW = 0;
422   uint8_t TailAgnostic : 1;
423   uint8_t MaskAgnostic : 1;
424   uint8_t SEWLMULRatioOnly : 1;
425 
426 public:
427   VSETVLIInfo()
428       : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
429         SEWLMULRatioOnly(false) {}
430 
431   static VSETVLIInfo getUnknown() {
432     VSETVLIInfo Info;
433     Info.setUnknown();
434     return Info;
435   }
436 
437   bool isValid() const { return State != Uninitialized; }
438   void setUnknown() { State = Unknown; }
439   bool isUnknown() const { return State == Unknown; }
440 
441   void setAVLReg(Register Reg) {
442     AVLReg = Reg;
443     State = AVLIsReg;
444   }
445 
446   void setAVLImm(unsigned Imm) {
447     AVLImm = Imm;
448     State = AVLIsImm;
449   }
450 
451   bool hasAVLImm() const { return State == AVLIsImm; }
452   bool hasAVLReg() const { return State == AVLIsReg; }
453   Register getAVLReg() const {
454     assert(hasAVLReg());
455     return AVLReg;
456   }
457   unsigned getAVLImm() const {
458     assert(hasAVLImm());
459     return AVLImm;
460   }
461 
462   unsigned getSEW() const { return SEW; }
463   RISCVII::VLMUL getVLMUL() const { return VLMul; }
464 
465   bool hasNonZeroAVL() const {
466     if (hasAVLImm())
467       return getAVLImm() > 0;
468     if (hasAVLReg())
469       return getAVLReg() == RISCV::X0;
470     return false;
471   }
472 
473   bool hasSameAVL(const VSETVLIInfo &Other) const {
474     assert(isValid() && Other.isValid() &&
475            "Can't compare invalid VSETVLIInfos");
476     assert(!isUnknown() && !Other.isUnknown() &&
477            "Can't compare AVL in unknown state");
478     if (hasAVLReg() && Other.hasAVLReg())
479       return getAVLReg() == Other.getAVLReg();
480 
481     if (hasAVLImm() && Other.hasAVLImm())
482       return getAVLImm() == Other.getAVLImm();
483 
484     return false;
485   }
486 
487   void setVTYPE(unsigned VType) {
488     assert(isValid() && !isUnknown() &&
489            "Can't set VTYPE for uninitialized or unknown");
490     VLMul = RISCVVType::getVLMUL(VType);
491     SEW = RISCVVType::getSEW(VType);
492     TailAgnostic = RISCVVType::isTailAgnostic(VType);
493     MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
494   }
495   void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
496     assert(isValid() && !isUnknown() &&
497            "Can't set VTYPE for uninitialized or unknown");
498     VLMul = L;
499     SEW = S;
500     TailAgnostic = TA;
501     MaskAgnostic = MA;
502   }
503 
504   unsigned encodeVTYPE() const {
505     assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
506            "Can't encode VTYPE for uninitialized or unknown");
507     return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
508   }
509 
510   bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
511 
512   bool hasSameSEW(const VSETVLIInfo &Other) const {
513     assert(isValid() && Other.isValid() &&
514            "Can't compare invalid VSETVLIInfos");
515     assert(!isUnknown() && !Other.isUnknown() &&
516            "Can't compare VTYPE in unknown state");
517     assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
518            "Can't compare when only LMUL/SEW ratio is valid.");
519     return SEW == Other.SEW;
520   }
521 
522   bool hasSameVTYPE(const VSETVLIInfo &Other) const {
523     assert(isValid() && Other.isValid() &&
524            "Can't compare invalid VSETVLIInfos");
525     assert(!isUnknown() && !Other.isUnknown() &&
526            "Can't compare VTYPE in unknown state");
527     assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
528            "Can't compare when only LMUL/SEW ratio is valid.");
529     return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
530            std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
531                     Other.MaskAgnostic);
532   }
533 
534   unsigned getSEWLMULRatio() const {
535     assert(isValid() && !isUnknown() &&
536            "Can't use VTYPE for uninitialized or unknown");
537     return ::getSEWLMULRatio(SEW, VLMul);
538   }
539 
540   // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
541   // Note that having the same VLMAX ensures that both share the same
542   // function from AVL to VL; that is, they must produce the same VL value
543   // for any given AVL value.
544   bool hasSameVLMAX(const VSETVLIInfo &Other) const {
545     assert(isValid() && Other.isValid() &&
546            "Can't compare invalid VSETVLIInfos");
547     assert(!isUnknown() && !Other.isUnknown() &&
548            "Can't compare VTYPE in unknown state");
549     return getSEWLMULRatio() == Other.getSEWLMULRatio();
550   }
551 
552   bool hasSamePolicy(const VSETVLIInfo &Other) const {
553     assert(isValid() && Other.isValid() &&
554            "Can't compare invalid VSETVLIInfos");
555     assert(!isUnknown() && !Other.isUnknown() &&
556            "Can't compare VTYPE in unknown state");
557     return TailAgnostic == Other.TailAgnostic &&
558            MaskAgnostic == Other.MaskAgnostic;
559   }
560 
561   bool hasCompatibleVTYPE(const MachineInstr &MI,
562                           const VSETVLIInfo &Require) const {
563     const DemandedFields Used = getDemanded(MI);
564     return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used);
565   }
566 
567   // Determine whether the vector instructions requirements represented by
568   // Require are compatible with the previous vsetvli instruction represented
569   // by this.  MI is the instruction whose requirements we're considering.
570   bool isCompatible(const MachineInstr &MI, const VSETVLIInfo &Require) const {
571     assert(isValid() && Require.isValid() &&
572            "Can't compare invalid VSETVLIInfos");
573     assert(!Require.SEWLMULRatioOnly &&
574            "Expected a valid VTYPE for instruction!");
575     // Nothing is compatible with Unknown.
576     if (isUnknown() || Require.isUnknown())
577       return false;
578 
579     // If only our VLMAX ratio is valid, then this isn't compatible.
580     if (SEWLMULRatioOnly)
581       return false;
582 
583     // If the instruction doesn't need an AVLReg and the SEW matches, consider
584     // it compatible.
585     if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister)
586       if (SEW == Require.SEW)
587         return true;
588 
589     return hasSameAVL(Require) && hasCompatibleVTYPE(MI, Require);
590   }
591 
592   bool operator==(const VSETVLIInfo &Other) const {
593     // Uninitialized is only equal to another Uninitialized.
594     if (!isValid())
595       return !Other.isValid();
596     if (!Other.isValid())
597       return !isValid();
598 
599     // Unknown is only equal to another Unknown.
600     if (isUnknown())
601       return Other.isUnknown();
602     if (Other.isUnknown())
603       return isUnknown();
604 
605     if (!hasSameAVL(Other))
606       return false;
607 
608     // If the SEWLMULRatioOnly bits are different, then they aren't equal.
609     if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
610       return false;
611 
612     // If only the VLMAX is valid, check that it is the same.
613     if (SEWLMULRatioOnly)
614       return hasSameVLMAX(Other);
615 
616     // If the full VTYPE is valid, check that it is the same.
617     return hasSameVTYPE(Other);
618   }
619 
620   bool operator!=(const VSETVLIInfo &Other) const {
621     return !(*this == Other);
622   }
623 
624   // Calculate the VSETVLIInfo visible to a block assuming this and Other are
625   // both predecessors.
626   VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
627     // If the new value isn't valid, ignore it.
628     if (!Other.isValid())
629       return *this;
630 
631     // If this value isn't valid, this must be the first predecessor, use it.
632     if (!isValid())
633       return Other;
634 
635     // If either is unknown, the result is unknown.
636     if (isUnknown() || Other.isUnknown())
637       return VSETVLIInfo::getUnknown();
638 
639     // If we have an exact, match return this.
640     if (*this == Other)
641       return *this;
642 
643     // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
644     // return an SEW/LMUL ratio only value.
645     if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
646       VSETVLIInfo MergeInfo = *this;
647       MergeInfo.SEWLMULRatioOnly = true;
648       return MergeInfo;
649     }
650 
651     // Otherwise the result is unknown.
652     return VSETVLIInfo::getUnknown();
653   }
654 
655 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
656   /// Support for debugging, callable in GDB: V->dump()
657   LLVM_DUMP_METHOD void dump() const {
658     print(dbgs());
659     dbgs() << "\n";
660   }
661 
662   /// Implement operator<<.
663   /// @{
664   void print(raw_ostream &OS) const {
665     OS << "{";
666     if (!isValid())
667       OS << "Uninitialized";
668     if (isUnknown())
669       OS << "unknown";
670     if (hasAVLReg())
671       OS << "AVLReg=" << (unsigned)AVLReg;
672     if (hasAVLImm())
673       OS << "AVLImm=" << (unsigned)AVLImm;
674     OS << ", "
675        << "VLMul=" << (unsigned)VLMul << ", "
676        << "SEW=" << (unsigned)SEW << ", "
677        << "TailAgnostic=" << (bool)TailAgnostic << ", "
678        << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
679        << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
680   }
681 #endif
682 };
683 
684 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
685 LLVM_ATTRIBUTE_USED
686 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
687   V.print(OS);
688   return OS;
689 }
690 #endif
691 
692 struct BlockData {
693   // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers
694   // made by this block. Calculated in Phase 1.
695   VSETVLIInfo Change;
696 
697   // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
698   // block. Calculated in Phase 2.
699   VSETVLIInfo Exit;
700 
701   // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
702   // blocks. Calculated in Phase 2, and used by Phase 3.
703   VSETVLIInfo Pred;
704 
705   // Keeps track of whether the block is already in the queue.
706   bool InQueue = false;
707 
708   BlockData() = default;
709 };
710 
711 class RISCVInsertVSETVLI : public MachineFunctionPass {
712   const TargetInstrInfo *TII;
713   MachineRegisterInfo *MRI;
714 
715   std::vector<BlockData> BlockInfo;
716   std::queue<const MachineBasicBlock *> WorkList;
717 
718 public:
719   static char ID;
720 
721   RISCVInsertVSETVLI() : MachineFunctionPass(ID) {
722     initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry());
723   }
724   bool runOnMachineFunction(MachineFunction &MF) override;
725 
726   void getAnalysisUsage(AnalysisUsage &AU) const override {
727     AU.setPreservesCFG();
728     MachineFunctionPass::getAnalysisUsage(AU);
729   }
730 
731   StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
732 
733 private:
734   bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
735                    const VSETVLIInfo &CurInfo) const;
736   bool needVSETVLIPHI(const VSETVLIInfo &Require,
737                       const MachineBasicBlock &MBB) const;
738   void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
739                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
740   void insertVSETVLI(MachineBasicBlock &MBB,
741                      MachineBasicBlock::iterator InsertPt, DebugLoc DL,
742                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
743 
744   void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI);
745   void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI);
746   bool computeVLVTYPEChanges(const MachineBasicBlock &MBB);
747   void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
748   void emitVSETVLIs(MachineBasicBlock &MBB);
749   void doLocalPostpass(MachineBasicBlock &MBB);
750   void doPRE(MachineBasicBlock &MBB);
751   void insertReadVL(MachineBasicBlock &MBB);
752 };
753 
754 } // end anonymous namespace
755 
756 char RISCVInsertVSETVLI::ID = 0;
757 
758 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
759                 false, false)
760 
761 static bool isVectorConfigInstr(const MachineInstr &MI) {
762   return MI.getOpcode() == RISCV::PseudoVSETVLI ||
763          MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
764          MI.getOpcode() == RISCV::PseudoVSETIVLI;
765 }
766 
767 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves
768 /// VL and only sets VTYPE.
769 static bool isVLPreservingConfig(const MachineInstr &MI) {
770   if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
771     return false;
772   assert(RISCV::X0 == MI.getOperand(1).getReg());
773   return RISCV::X0 == MI.getOperand(0).getReg();
774 }
775 
776 static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
777                                        const MachineRegisterInfo *MRI) {
778   VSETVLIInfo InstrInfo;
779 
780   // If the instruction has policy argument, use the argument.
781   // If there is no policy argument, default to tail agnostic unless the
782   // destination is tied to a source. Unless the source is undef. In that case
783   // the user would have some control over the policy values.
784   bool TailAgnostic = true;
785   bool UsesMaskPolicy = RISCVII::usesMaskPolicy(TSFlags);
786   // FIXME: Could we look at the above or below instructions to choose the
787   // matched mask policy to reduce vsetvli instructions? Default mask policy is
788   // agnostic if instructions use mask policy, otherwise is undisturbed. Because
789   // most mask operations are mask undisturbed, so we could possibly reduce the
790   // vsetvli between mask and nomasked instruction sequence.
791   bool MaskAgnostic = UsesMaskPolicy;
792   unsigned UseOpIdx;
793   if (RISCVII::hasVecPolicyOp(TSFlags)) {
794     const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
795     uint64_t Policy = Op.getImm();
796     assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
797            "Invalid Policy Value");
798     // Although in some cases, mismatched passthru/maskedoff with policy value
799     // does not make sense (ex. tied operand is IMPLICIT_DEF with non-TAMA
800     // policy, or tied operand is not IMPLICIT_DEF with TAMA policy), but users
801     // have set the policy value explicitly, so compiler would not fix it.
802     TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
803     MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
804   } else if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
805     TailAgnostic = false;
806     if (UsesMaskPolicy)
807       MaskAgnostic = false;
808     // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
809     const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
810     MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg());
811     if (UseMI && UseMI->isImplicitDef()) {
812       TailAgnostic = true;
813       if (UsesMaskPolicy)
814         MaskAgnostic = true;
815     }
816     // Some pseudo instructions force a tail agnostic policy despite having a
817     // tied def.
818     if (RISCVII::doesForceTailAgnostic(TSFlags))
819       TailAgnostic = true;
820   }
821 
822   RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
823 
824   unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
825   // A Log2SEW of 0 is an operation on mask registers only.
826   unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
827   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
828 
829   if (RISCVII::hasVLOp(TSFlags)) {
830     const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
831     if (VLOp.isImm()) {
832       int64_t Imm = VLOp.getImm();
833       // Conver the VLMax sentintel to X0 register.
834       if (Imm == RISCV::VLMaxSentinel)
835         InstrInfo.setAVLReg(RISCV::X0);
836       else
837         InstrInfo.setAVLImm(Imm);
838     } else {
839       InstrInfo.setAVLReg(VLOp.getReg());
840     }
841   } else {
842     InstrInfo.setAVLReg(RISCV::NoRegister);
843   }
844 #ifndef NDEBUG
845   if (Optional<unsigned> EEW = getEEWForLoadStore(MI)) {
846     assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
847   }
848 #endif
849   InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
850 
851   return InstrInfo;
852 }
853 
854 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
855                                        const VSETVLIInfo &Info,
856                                        const VSETVLIInfo &PrevInfo) {
857   DebugLoc DL = MI.getDebugLoc();
858   insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
859 }
860 
861 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
862                      MachineBasicBlock::iterator InsertPt, DebugLoc DL,
863                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
864 
865   // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
866   // VLMAX.
867   if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
868       Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
869     BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
870         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
871         .addReg(RISCV::X0, RegState::Kill)
872         .addImm(Info.encodeVTYPE())
873         .addReg(RISCV::VL, RegState::Implicit);
874     return;
875   }
876 
877   if (Info.hasAVLImm()) {
878     BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
879         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
880         .addImm(Info.getAVLImm())
881         .addImm(Info.encodeVTYPE());
882     return;
883   }
884 
885   Register AVLReg = Info.getAVLReg();
886   if (AVLReg == RISCV::NoRegister) {
887     // We can only use x0, x0 if there's no chance of the vtype change causing
888     // the previous vl to become invalid.
889     if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
890         Info.hasSameVLMAX(PrevInfo)) {
891       BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
892           .addReg(RISCV::X0, RegState::Define | RegState::Dead)
893           .addReg(RISCV::X0, RegState::Kill)
894           .addImm(Info.encodeVTYPE())
895           .addReg(RISCV::VL, RegState::Implicit);
896       return;
897     }
898     // Otherwise use an AVL of 0 to avoid depending on previous vl.
899     BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
900         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
901         .addImm(0)
902         .addImm(Info.encodeVTYPE());
903     return;
904   }
905 
906   if (AVLReg.isVirtual())
907     MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
908 
909   // Use X0 as the DestReg unless AVLReg is X0. We also need to change the
910   // opcode if the AVLReg is X0 as they have different register classes for
911   // the AVL operand.
912   Register DestReg = RISCV::X0;
913   unsigned Opcode = RISCV::PseudoVSETVLI;
914   if (AVLReg == RISCV::X0) {
915     DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
916     Opcode = RISCV::PseudoVSETVLIX0;
917   }
918   BuildMI(MBB, InsertPt, DL, TII->get(Opcode))
919       .addReg(DestReg, RegState::Define | RegState::Dead)
920       .addReg(AVLReg)
921       .addImm(Info.encodeVTYPE());
922 }
923 
924 // Return a VSETVLIInfo representing the changes made by this VSETVLI or
925 // VSETIVLI instruction.
926 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
927   VSETVLIInfo NewInfo;
928   if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
929     NewInfo.setAVLImm(MI.getOperand(1).getImm());
930   } else {
931     assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
932            MI.getOpcode() == RISCV::PseudoVSETVLIX0);
933     Register AVLReg = MI.getOperand(1).getReg();
934     assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
935            "Can't handle X0, X0 vsetvli yet");
936     NewInfo.setAVLReg(AVLReg);
937   }
938   NewInfo.setVTYPE(MI.getOperand(2).getImm());
939 
940   return NewInfo;
941 }
942 
943 /// Return true if a VSETVLI is required to transition from CurInfo to Require
944 /// before MI.
945 bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
946                                      const VSETVLIInfo &Require,
947                                      const VSETVLIInfo &CurInfo) const {
948   assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI));
949 
950   if (CurInfo.isCompatible(MI, Require))
951     return false;
952 
953   if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
954     return true;
955 
956   // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0.
957   // VL=0 is uninteresting (as it should have been deleted already), so it is
958   // compatible if we can prove both are non-zero.  Additionally, if writing
959   // to an implicit_def operand, we don't need to preserve any other bits and
960   // are thus compatible with any larger etype, and can disregard policy bits.
961   if (isScalarMoveInstr(MI) &&
962       CurInfo.hasNonZeroAVL() && Require.hasNonZeroAVL()) {
963     auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg());
964     if (VRegDef && VRegDef->isImplicitDef() &&
965         CurInfo.getSEW() >= Require.getSEW())
966       return false;
967     if (CurInfo.hasSameSEW(Require) && CurInfo.hasSamePolicy(Require))
968       return false;
969   }
970 
971   // We didn't find a compatible value. If our AVL is a virtual register,
972   // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
973   // and the last VL/VTYPE we observed is the same, we don't need a
974   // VSETVLI here.
975   if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() &&
976       CurInfo.hasCompatibleVTYPE(MI, Require)) {
977     if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
978       if (isVectorConfigInstr(*DefMI)) {
979         VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
980         if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
981           return false;
982       }
983     }
984   }
985 
986   return true;
987 }
988 
989 // Given an incoming state reaching MI, modifies that state so that it is minimally
990 // compatible with MI.  The resulting state is guaranteed to be semantically legal
991 // for MI, but may not be the state requested by MI.
992 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) {
993   uint64_t TSFlags = MI.getDesc().TSFlags;
994   if (!RISCVII::hasSEWOp(TSFlags))
995     return;
996 
997   const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
998   if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
999     return;
1000 
1001   const VSETVLIInfo PrevInfo = Info;
1002   Info = NewInfo;
1003 
1004   if (!RISCVII::hasVLOp(TSFlags))
1005     return;
1006 
1007   // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
1008   // VL > 0. We can discard the user requested AVL and just use the last
1009   // one if we can prove it equally zero.  This removes a vsetvli entirely
1010   // if the types match or allows use of cheaper avl preserving variant
1011   // if VLMAX doesn't change.  If VLMAX might change, we couldn't use
1012   // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to
1013   // prevent extending live range of an avl register operand.
1014   // TODO: We can probably relax this for immediates.
1015   if (isScalarMoveInstr(MI) && PrevInfo.isValid() &&
1016       PrevInfo.hasNonZeroAVL() && Info.hasNonZeroAVL() &&
1017       Info.hasSameVLMAX(PrevInfo)) {
1018     if (PrevInfo.hasAVLImm())
1019       Info.setAVLImm(PrevInfo.getAVLImm());
1020     else
1021       Info.setAVLReg(PrevInfo.getAVLReg());
1022     return;
1023   }
1024 
1025   // Two cases involving an AVL resulting from a previous vsetvli.
1026   // 1) If the AVL is the result of a previous vsetvli which has the
1027   //    same AVL and VLMAX as our current state, we can reuse the AVL
1028   //    from the current state for the new one.  This allows us to
1029   //    generate 'vsetvli x0, x0, vtype" or possible skip the transition
1030   //    entirely.
1031   // 2) If AVL is defined by a vsetvli with the same VLMAX, we can
1032   //    replace the AVL operand with the AVL of the defining vsetvli.
1033   //    We avoid general register AVLs to avoid extending live ranges
1034   //    without being sure we can kill the original source reg entirely.
1035   if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual())
1036     return;
1037   MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg());
1038   if (!DefMI || !isVectorConfigInstr(*DefMI))
1039     return;
1040 
1041   VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1042   // case 1
1043   if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
1044       DefInfo.hasSameAVL(PrevInfo) &&
1045       DefInfo.hasSameVLMAX(PrevInfo)) {
1046     if (PrevInfo.hasAVLImm())
1047       Info.setAVLImm(PrevInfo.getAVLImm());
1048     else
1049       Info.setAVLReg(PrevInfo.getAVLReg());
1050     return;
1051   }
1052   // case 2
1053   if (DefInfo.hasSameVLMAX(Info) &&
1054       (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
1055     if (DefInfo.hasAVLImm())
1056       Info.setAVLImm(DefInfo.getAVLImm());
1057     else
1058       Info.setAVLReg(DefInfo.getAVLReg());
1059     return;
1060   }
1061 }
1062 
1063 // Given a state with which we evaluated MI (see transferBefore above for why
1064 // this might be different that the state MI requested), modify the state to
1065 // reflect the changes MI might make.
1066 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) {
1067   if (isVectorConfigInstr(MI)) {
1068     Info = getInfoForVSETVLI(MI);
1069     return;
1070   }
1071 
1072   if (RISCV::isFaultFirstLoad(MI)) {
1073     // Update AVL to vl-output of the fault first load.
1074     Info.setAVLReg(MI.getOperand(1).getReg());
1075     return;
1076   }
1077 
1078   // If this is something that updates VL/VTYPE that we don't know about, set
1079   // the state to unknown.
1080   if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1081       MI.modifiesRegister(RISCV::VTYPE))
1082     Info = VSETVLIInfo::getUnknown();
1083 }
1084 
1085 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
1086   bool HadVectorOp = false;
1087 
1088   BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1089   BBInfo.Change = BBInfo.Pred;
1090   for (const MachineInstr &MI : MBB) {
1091     transferBefore(BBInfo.Change, MI);
1092 
1093     if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1094       HadVectorOp = true;
1095 
1096     transferAfter(BBInfo.Change, MI);
1097   }
1098 
1099   return HadVectorOp;
1100 }
1101 
1102 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1103 
1104   BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1105 
1106   BBInfo.InQueue = false;
1107 
1108   VSETVLIInfo InInfo;
1109   if (MBB.pred_empty()) {
1110     // There are no predecessors, so use the default starting status.
1111     InInfo.setUnknown();
1112   } else {
1113     for (MachineBasicBlock *P : MBB.predecessors())
1114       InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1115   }
1116 
1117   // If we don't have any valid predecessor value, wait until we do.
1118   if (!InInfo.isValid())
1119     return;
1120 
1121   // If no change, no need to rerun block
1122   if (InInfo == BBInfo.Pred)
1123     return;
1124 
1125   BBInfo.Pred = InInfo;
1126   LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1127                     << " changed to " << BBInfo.Pred << "\n");
1128 
1129   // Note: It's tempting to cache the state changes here, but due to the
1130   // compatibility checks performed a blocks output state can change based on
1131   // the input state.  To cache, we'd have to add logic for finding
1132   // never-compatible state changes.
1133   computeVLVTYPEChanges(MBB);
1134   VSETVLIInfo TmpStatus = BBInfo.Change;
1135 
1136   // If the new exit value matches the old exit value, we don't need to revisit
1137   // any blocks.
1138   if (BBInfo.Exit == TmpStatus)
1139     return;
1140 
1141   BBInfo.Exit = TmpStatus;
1142   LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1143                     << " changed to " << BBInfo.Exit << "\n");
1144 
1145   // Add the successors to the work list so we can propagate the changed exit
1146   // status.
1147   for (MachineBasicBlock *S : MBB.successors())
1148     if (!BlockInfo[S->getNumber()].InQueue)
1149       WorkList.push(S);
1150 }
1151 
1152 // If we weren't able to prove a vsetvli was directly unneeded, it might still
1153 // be unneeded if the AVL is a phi node where all incoming values are VL
1154 // outputs from the last VSETVLI in their respective basic blocks.
1155 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1156                                         const MachineBasicBlock &MBB) const {
1157   if (DisableInsertVSETVLPHIOpt)
1158     return true;
1159 
1160   if (!Require.hasAVLReg())
1161     return true;
1162 
1163   Register AVLReg = Require.getAVLReg();
1164   if (!AVLReg.isVirtual())
1165     return true;
1166 
1167   // We need the AVL to be produce by a PHI node in this basic block.
1168   MachineInstr *PHI = MRI->getVRegDef(AVLReg);
1169   if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
1170     return true;
1171 
1172   for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
1173        PHIOp += 2) {
1174     Register InReg = PHI->getOperand(PHIOp).getReg();
1175     MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
1176     const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
1177     // If the exit from the predecessor has the VTYPE we are looking for
1178     // we might be able to avoid a VSETVLI.
1179     if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
1180       return true;
1181 
1182     // We need the PHI input to the be the output of a VSET(I)VLI.
1183     MachineInstr *DefMI = MRI->getVRegDef(InReg);
1184     if (!DefMI || !isVectorConfigInstr(*DefMI))
1185       return true;
1186 
1187     // We found a VSET(I)VLI make sure it matches the output of the
1188     // predecessor block.
1189     VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1190     if (!DefInfo.hasSameAVL(PBBInfo.Exit) ||
1191         !DefInfo.hasSameVTYPE(PBBInfo.Exit))
1192       return true;
1193   }
1194 
1195   // If all the incoming values to the PHI checked out, we don't need
1196   // to insert a VSETVLI.
1197   return false;
1198 }
1199 
1200 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1201   VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1202   // Track whether the prefix of the block we've scanned is transparent
1203   // (meaning has not yet changed the abstract state).
1204   bool PrefixTransparent = true;
1205   for (MachineInstr &MI : MBB) {
1206     const VSETVLIInfo PrevInfo = CurInfo;
1207     transferBefore(CurInfo, MI);
1208 
1209     // If this is an explicit VSETVLI or VSETIVLI, update our state.
1210     if (isVectorConfigInstr(MI)) {
1211       // Conservatively, mark the VL and VTYPE as live.
1212       assert(MI.getOperand(3).getReg() == RISCV::VL &&
1213              MI.getOperand(4).getReg() == RISCV::VTYPE &&
1214              "Unexpected operands where VL and VTYPE should be");
1215       MI.getOperand(3).setIsDead(false);
1216       MI.getOperand(4).setIsDead(false);
1217       PrefixTransparent = false;
1218     }
1219 
1220     uint64_t TSFlags = MI.getDesc().TSFlags;
1221     if (RISCVII::hasSEWOp(TSFlags)) {
1222       if (PrevInfo != CurInfo) {
1223         // If this is the first implicit state change, and the state change
1224         // requested can be proven to produce the same register contents, we
1225         // can skip emitting the actual state change and continue as if we
1226         // had since we know the GPR result of the implicit state change
1227         // wouldn't be used and VL/VTYPE registers are correct.  Note that
1228         // we *do* need to model the state as if it changed as while the
1229         // register contents are unchanged, the abstract model can change.
1230         if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1231           insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
1232         PrefixTransparent = false;
1233       }
1234 
1235       if (RISCVII::hasVLOp(TSFlags)) {
1236         MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1237         if (VLOp.isReg()) {
1238           // Erase the AVL operand from the instruction.
1239           VLOp.setReg(RISCV::NoRegister);
1240           VLOp.setIsKill(false);
1241         }
1242         MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1243                                                 /*isImp*/ true));
1244       }
1245       MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1246                                               /*isImp*/ true));
1247     }
1248 
1249     if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1250         MI.modifiesRegister(RISCV::VTYPE))
1251       PrefixTransparent = false;
1252 
1253     transferAfter(CurInfo, MI);
1254   }
1255 
1256   // If we reach the end of the block and our current info doesn't match the
1257   // expected info, insert a vsetvli to correct.
1258   if (!UseStrictAsserts) {
1259     const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit;
1260     if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() &&
1261         CurInfo != ExitInfo) {
1262       // Note there's an implicit assumption here that terminators never use
1263       // or modify VL or VTYPE.  Also, fallthrough will return end().
1264       auto InsertPt = MBB.getFirstInstrTerminator();
1265       insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo,
1266                     CurInfo);
1267       CurInfo = ExitInfo;
1268     }
1269   }
1270 
1271   if (UseStrictAsserts && CurInfo.isValid()) {
1272     const auto &Info = BlockInfo[MBB.getNumber()];
1273     if (CurInfo != Info.Exit) {
1274       LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1275       LLVM_DEBUG(dbgs() << "  begin        state: " << Info.Pred << "\n");
1276       LLVM_DEBUG(dbgs() << "  expected end state: " << Info.Exit << "\n");
1277       LLVM_DEBUG(dbgs() << "  actual   end state: " << CurInfo << "\n");
1278     }
1279     assert(CurInfo == Info.Exit &&
1280            "InsertVSETVLI dataflow invariant violated");
1281   }
1282 }
1283 
1284 /// Return true if the VL value configured must be equal to the requested one.
1285 static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) {
1286   if (!Info.hasAVLImm())
1287     // VLMAX is always the same value.
1288     // TODO: Could extend to other registers by looking at the associated vreg
1289     // def placement.
1290     return RISCV::X0 == Info.getAVLReg();
1291 
1292   unsigned AVL = Info.getAVLImm();
1293   unsigned SEW = Info.getSEW();
1294   unsigned AVLInBits = AVL * SEW;
1295 
1296   unsigned LMul;
1297   bool Fractional;
1298   std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL());
1299 
1300   if (Fractional)
1301     return ST.getRealMinVLen() / LMul >= AVLInBits;
1302   return ST.getRealMinVLen() * LMul >= AVLInBits;
1303 }
1304 
1305 /// Perform simple partial redundancy elimination of the VSETVLI instructions
1306 /// we're about to insert by looking for cases where we can PRE from the
1307 /// beginning of one block to the end of one of its predecessors.  Specifically,
1308 /// this is geared to catch the common case of a fixed length vsetvl in a single
1309 /// block loop when it could execute once in the preheader instead.
1310 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1311   const MachineFunction &MF = *MBB.getParent();
1312   const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
1313 
1314   if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1315     return;
1316 
1317   MachineBasicBlock *UnavailablePred = nullptr;
1318   VSETVLIInfo AvailableInfo;
1319   for (MachineBasicBlock *P : MBB.predecessors()) {
1320     const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1321     if (PredInfo.isUnknown()) {
1322       if (UnavailablePred)
1323         return;
1324       UnavailablePred = P;
1325     } else if (!AvailableInfo.isValid()) {
1326       AvailableInfo = PredInfo;
1327     } else if (AvailableInfo != PredInfo) {
1328       return;
1329     }
1330   }
1331 
1332   // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1333   // phase 3.
1334   if (!UnavailablePred || !AvailableInfo.isValid())
1335     return;
1336 
1337   // Critical edge - TODO: consider splitting?
1338   if (UnavailablePred->succ_size() != 1)
1339     return;
1340 
1341   // If VL can be less than AVL, then we can't reduce the frequency of exec.
1342   if (!hasFixedResult(AvailableInfo, ST))
1343     return;
1344 
1345   // Does it actually let us remove an implicit transition in MBB?
1346   bool Found = false;
1347   for (auto &MI : MBB) {
1348     if (isVectorConfigInstr(MI))
1349       return;
1350 
1351     const uint64_t TSFlags = MI.getDesc().TSFlags;
1352     if (RISCVII::hasSEWOp(TSFlags)) {
1353       if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI))
1354         return;
1355       Found = true;
1356       break;
1357     }
1358   }
1359   if (!Found)
1360     return;
1361 
1362   // Finally, update both data flow state and insert the actual vsetvli.
1363   // Doing both keeps the code in sync with the dataflow results, which
1364   // is critical for correctness of phase 3.
1365   auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit;
1366   LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1367                     << UnavailablePred->getName() << " with state "
1368                     << AvailableInfo << "\n");
1369   BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1370   BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1371 
1372   // Note there's an implicit assumption here that terminators never use
1373   // or modify VL or VTYPE.  Also, fallthrough will return end().
1374   auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1375   insertVSETVLI(*UnavailablePred, InsertPt,
1376                 UnavailablePred->findDebugLoc(InsertPt),
1377                 AvailableInfo, OldInfo);
1378 }
1379 
1380 static void doUnion(DemandedFields &A, DemandedFields B) {
1381   A.VL |= B.VL;
1382   A.SEW |= B.SEW;
1383   A.LMUL |= B.LMUL;
1384   A.SEWLMULRatio |= B.SEWLMULRatio;
1385   A.TailPolicy |= B.TailPolicy;
1386   A.MaskPolicy |= B.MaskPolicy;
1387 }
1388 
1389 // Return true if we can mutate PrevMI's VTYPE to match MI's
1390 // without changing any the fields which have been used.
1391 // TODO: Restructure code to allow code reuse between this and isCompatible
1392 // above.
1393 static bool canMutatePriorConfig(const MachineInstr &PrevMI,
1394                                  const MachineInstr &MI,
1395                                  const DemandedFields &Used) {
1396   // TODO: Extend this to handle cases where VL does change, but VL
1397   // has not been used.  (e.g. over a vmv.x.s)
1398   if (!isVLPreservingConfig(MI))
1399     // Note: `vsetvli x0, x0, vtype' is the canonical instruction
1400     // for this case.  If you find yourself wanting to add other forms
1401     // to this "unused VTYPE" case, we're probably missing a
1402     // canonicalization earlier.
1403     return false;
1404 
1405   if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
1406     return false;
1407 
1408   auto PriorVType = PrevMI.getOperand(2).getImm();
1409   auto VType = MI.getOperand(2).getImm();
1410   return areCompatibleVTYPEs(PriorVType, VType, Used);
1411 }
1412 
1413 void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
1414   MachineInstr *PrevMI = nullptr;
1415   DemandedFields Used;
1416   SmallVector<MachineInstr*> ToDelete;
1417   for (MachineInstr &MI : MBB) {
1418     // Note: Must be *before* vsetvli handling to account for config cases
1419     // which only change some subfields.
1420     doUnion(Used, getDemanded(MI));
1421 
1422     if (!isVectorConfigInstr(MI))
1423       continue;
1424 
1425     if (PrevMI) {
1426       if (!Used.VL && !Used.usedVTYPE()) {
1427         ToDelete.push_back(PrevMI);
1428         // fallthrough
1429       } else if (canMutatePriorConfig(*PrevMI, MI, Used)) {
1430         PrevMI->getOperand(2).setImm(MI.getOperand(2).getImm());
1431         ToDelete.push_back(&MI);
1432         // Leave PrevMI unchanged
1433         continue;
1434       }
1435     }
1436     PrevMI = &MI;
1437     Used = getDemanded(MI);
1438     Register VRegDef = MI.getOperand(0).getReg();
1439     if (VRegDef != RISCV::X0 &&
1440         !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
1441       Used.VL = true;
1442   }
1443 
1444   for (auto *MI : ToDelete)
1445     MI->eraseFromParent();
1446 }
1447 
1448 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1449   for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1450     MachineInstr &MI = *I++;
1451     if (RISCV::isFaultFirstLoad(MI)) {
1452       Register VLOutput = MI.getOperand(1).getReg();
1453       if (!MRI->use_nodbg_empty(VLOutput))
1454         BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
1455                 VLOutput);
1456       // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1457       MI.getOperand(1).setReg(RISCV::X0);
1458     }
1459   }
1460 }
1461 
1462 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1463   // Skip if the vector extension is not enabled.
1464   const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
1465   if (!ST.hasVInstructions())
1466     return false;
1467 
1468   LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1469 
1470   TII = ST.getInstrInfo();
1471   MRI = &MF.getRegInfo();
1472 
1473   assert(BlockInfo.empty() && "Expect empty block infos");
1474   BlockInfo.resize(MF.getNumBlockIDs());
1475 
1476   bool HaveVectorOp = false;
1477 
1478   // Phase 1 - determine how VL/VTYPE are affected by the each block.
1479   for (const MachineBasicBlock &MBB : MF) {
1480     HaveVectorOp |= computeVLVTYPEChanges(MBB);
1481     // Initial exit state is whatever change we found in the block.
1482     BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1483     BBInfo.Exit = BBInfo.Change;
1484     LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1485                       << " is " << BBInfo.Exit << "\n");
1486 
1487   }
1488 
1489   // If we didn't find any instructions that need VSETVLI, we're done.
1490   if (!HaveVectorOp) {
1491     BlockInfo.clear();
1492     return false;
1493   }
1494 
1495   // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1496   // blocks to the list here, but will also add any that need to be revisited
1497   // during Phase 2 processing.
1498   for (const MachineBasicBlock &MBB : MF) {
1499     WorkList.push(&MBB);
1500     BlockInfo[MBB.getNumber()].InQueue = true;
1501   }
1502   while (!WorkList.empty()) {
1503     const MachineBasicBlock &MBB = *WorkList.front();
1504     WorkList.pop();
1505     computeIncomingVLVTYPE(MBB);
1506   }
1507 
1508   // Perform partial redundancy elimination of vsetvli transitions.
1509   for (MachineBasicBlock &MBB : MF)
1510     doPRE(MBB);
1511 
1512   // Phase 3 - add any vsetvli instructions needed in the block. Use the
1513   // Phase 2 information to avoid adding vsetvlis before the first vector
1514   // instruction in the block if the VL/VTYPE is satisfied by its
1515   // predecessors.
1516   for (MachineBasicBlock &MBB : MF)
1517     emitVSETVLIs(MBB);
1518 
1519   // Now that all vsetvlis are explicit, go through and do block local
1520   // DSE and peephole based demanded fields based transforms.  Note that
1521   // this *must* be done outside the main dataflow so long as we allow
1522   // any cross block analysis within the dataflow.  We can't have both
1523   // demanded fields based mutation and non-local analysis in the
1524   // dataflow at the same time without introducing inconsistencies.
1525   for (MachineBasicBlock &MBB : MF)
1526     doLocalPostpass(MBB);
1527 
1528   // Once we're fully done rewriting all the instructions, do a final pass
1529   // through to check for VSETVLIs which write to an unused destination.
1530   // For the non X0, X0 variant, we can replace the destination register
1531   // with X0 to reduce register pressure.  This is really a generic
1532   // optimization which can be applied to any dead def (TODO: generalize).
1533   for (MachineBasicBlock &MBB : MF) {
1534     for (MachineInstr &MI : MBB) {
1535       if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
1536           MI.getOpcode() == RISCV::PseudoVSETIVLI) {
1537         Register VRegDef = MI.getOperand(0).getReg();
1538         if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef))
1539           MI.getOperand(0).setReg(RISCV::X0);
1540       }
1541     }
1542   }
1543 
1544   // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1545   // of VLEFF/VLSEGFF.
1546   for (MachineBasicBlock &MBB : MF)
1547     insertReadVL(MBB);
1548 
1549   BlockInfo.clear();
1550   return HaveVectorOp;
1551 }
1552 
1553 /// Returns an instance of the Insert VSETVLI pass.
1554 FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
1555   return new RISCVInsertVSETVLI();
1556 }
1557