xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll (revision 169c32eb49fa9b559d388b9b8f4374ff9e1be9be)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+v \
3; RUN:    -target-abi=lp64d -verify-machineinstrs -O2 < %s | FileCheck %s
4
5; The following tests check whether inserting VSETVLI avoids inserting
6; unneeded vsetvlis across basic blocks.
7
8declare i64 @llvm.riscv.vsetvli(i64, i64, i64)
9
10declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, i64, i64)
11declare <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>, i64, i64)
12
13declare <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, i64, i64)
14
15declare <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, i64, i64)
16
17declare <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double>, double, i64)
18declare <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32( <vscale x 2 x float>, float, i64)
19
20declare void @llvm.riscv.vse.nxv1f64(<vscale x 1 x double>, ptr nocapture, i64)
21declare void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float>, ptr nocapture, i64)
22
23define <vscale x 1 x double> @test1(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
24; CHECK-LABEL: test1:
25; CHECK:       # %bb.0: # %entry
26; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
27; CHECK-NEXT:    beqz a1, .LBB0_2
28; CHECK-NEXT:  # %bb.1: # %if.then
29; CHECK-NEXT:    vfadd.vv v8, v8, v9
30; CHECK-NEXT:    ret
31; CHECK-NEXT:  .LBB0_2: # %if.else
32; CHECK-NEXT:    vfsub.vv v8, v8, v9
33; CHECK-NEXT:    ret
34entry:
35  %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
36  %tobool = icmp eq i8 %cond, 0
37  br i1 %tobool, label %if.else, label %if.then
38
39if.then:                                          ; preds = %entry
40  %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
41  br label %if.end
42
43if.else:                                          ; preds = %entry
44  %2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
45  br label %if.end
46
47if.end:                                           ; preds = %if.else, %if.then
48  %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
49  ret <vscale x 1 x double> %c.0
50}
51
52@scratch = global i8 0, align 16
53
54define <vscale x 1 x double> @test2(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
55; CHECK-LABEL: test2:
56; CHECK:       # %bb.0: # %entry
57; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
58; CHECK-NEXT:    beqz a1, .LBB1_2
59; CHECK-NEXT:  # %bb.1: # %if.then
60; CHECK-NEXT:    vfadd.vv v9, v8, v9
61; CHECK-NEXT:    vfmul.vv v8, v9, v8
62; CHECK-NEXT:    ret
63; CHECK-NEXT:  .LBB1_2: # %if.else
64; CHECK-NEXT:    vfsub.vv v9, v8, v9
65; CHECK-NEXT:    vfmul.vv v8, v9, v8
66; CHECK-NEXT:    ret
67entry:
68  %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
69  %tobool = icmp eq i8 %cond, 0
70  br i1 %tobool, label %if.else, label %if.then
71
72if.then:                                          ; preds = %entry
73  %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
74  br label %if.end
75
76if.else:                                          ; preds = %entry
77  %2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
78  br label %if.end
79
80if.end:                                           ; preds = %if.else, %if.then
81  %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
82  %3 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 7, i64 %0)
83  ret <vscale x 1 x double> %3
84}
85
86define <vscale x 1 x double> @test3(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
87; CHECK-LABEL: test3:
88; CHECK:       # %bb.0: # %entry
89; CHECK-NEXT:    beqz a1, .LBB2_2
90; CHECK-NEXT:  # %bb.1: # %if.then
91; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
92; CHECK-NEXT:    vfadd.vv v9, v8, v9
93; CHECK-NEXT:    vfmul.vv v8, v9, v8
94; CHECK-NEXT:    ret
95; CHECK-NEXT:  .LBB2_2: # %if.else
96; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
97; CHECK-NEXT:    vfsub.vv v9, v8, v9
98; CHECK-NEXT:    vfmul.vv v8, v9, v8
99; CHECK-NEXT:    ret
100entry:
101  %tobool = icmp eq i8 %cond, 0
102  br i1 %tobool, label %if.else, label %if.then
103
104if.then:                                          ; preds = %entry
105  %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
106  %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
107  br label %if.end
108
109if.else:                                          ; preds = %entry
110  %2 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
111  %3 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %2)
112  br label %if.end
113
114if.end:                                           ; preds = %if.else, %if.then
115  %vl.0 = phi i64 [ %0, %if.then], [ %2, %if.else ]
116  %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %3, %if.else ]
117  %4 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 7, i64 %vl.0)
118  ret <vscale x 1 x double> %4
119}
120
121define <vscale x 1 x double> @test4(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %l, <vscale x 1 x double> %r) nounwind {
122; CHECK-LABEL: test4:
123; CHECK:       # %bb.0: # %entry
124; CHECK-NEXT:    beqz a1, .LBB3_2
125; CHECK-NEXT:  # %bb.1: # %if.then
126; CHECK-NEXT:    lui a1, %hi(.LCPI3_0)
127; CHECK-NEXT:    fld fa5, %lo(.LCPI3_0)(a1)
128; CHECK-NEXT:    lui a1, %hi(.LCPI3_1)
129; CHECK-NEXT:    fld fa4, %lo(.LCPI3_1)(a1)
130; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
131; CHECK-NEXT:    vfmv.v.f v10, fa5
132; CHECK-NEXT:    vfmv.v.f v11, fa4
133; CHECK-NEXT:    vfadd.vv v10, v10, v11
134; CHECK-NEXT:    lui a1, %hi(scratch)
135; CHECK-NEXT:    addi a1, a1, %lo(scratch)
136; CHECK-NEXT:    vse64.v v10, (a1)
137; CHECK-NEXT:    j .LBB3_3
138; CHECK-NEXT:  .LBB3_2: # %if.else
139; CHECK-NEXT:    lui a1, 260096
140; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
141; CHECK-NEXT:    vmv.v.x v10, a1
142; CHECK-NEXT:    lui a1, 262144
143; CHECK-NEXT:    vmv.v.x v11, a1
144; CHECK-NEXT:    vfadd.vv v10, v10, v11
145; CHECK-NEXT:    lui a1, %hi(scratch)
146; CHECK-NEXT:    addi a1, a1, %lo(scratch)
147; CHECK-NEXT:    vse32.v v10, (a1)
148; CHECK-NEXT:  .LBB3_3: # %if.end
149; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
150; CHECK-NEXT:    vfmul.vv v8, v8, v9
151; CHECK-NEXT:    ret
152entry:
153  %tobool = icmp eq i8 %cond, 0
154  br i1 %tobool, label %if.else, label %if.then
155
156if.then:                                          ; preds = %entry
157  %0 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double> undef, double 1.000000e+00, i64 %avl)
158  %1 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double> undef, double 2.000000e+00, i64 %avl)
159  %2 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %0, <vscale x 1 x double> %1, i64 7, i64 %avl)
160  %3 = bitcast ptr @scratch to ptr
161  tail call void @llvm.riscv.vse.nxv1f64(<vscale x 1 x double> %2, ptr %3, i64 %avl)
162  br label %if.end
163
164if.else:                                          ; preds = %entry
165  %4 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(<vscale x 2 x float> undef, float 1.000000e+00, i64 %avl)
166  %5 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(<vscale x 2 x float> undef, float 2.000000e+00, i64 %avl)
167  %6 = tail call <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %4, <vscale x 2 x float> %5, i64 7, i64 %avl)
168  %7 = bitcast ptr @scratch to ptr
169  tail call void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float> %6, ptr %7, i64 %avl)
170  br label %if.end
171
172if.end:                                           ; preds = %if.else, %if.then
173  %8 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %l, <vscale x 1 x double> %r, i64 7, i64 %avl)
174  ret <vscale x 1 x double> %8
175}
176
177define <vscale x 1 x double> @test5(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
178; CHECK-LABEL: test5:
179; CHECK:       # %bb.0: # %entry
180; CHECK-NEXT:    andi a2, a1, 1
181; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
182; CHECK-NEXT:    bnez a2, .LBB4_3
183; CHECK-NEXT:  # %bb.1: # %if.else
184; CHECK-NEXT:    vfsub.vv v9, v8, v9
185; CHECK-NEXT:    andi a1, a1, 2
186; CHECK-NEXT:    beqz a1, .LBB4_4
187; CHECK-NEXT:  .LBB4_2: # %if.then4
188; CHECK-NEXT:    vfmul.vv v8, v9, v8
189; CHECK-NEXT:    ret
190; CHECK-NEXT:  .LBB4_3: # %if.then
191; CHECK-NEXT:    vfadd.vv v9, v8, v9
192; CHECK-NEXT:    andi a1, a1, 2
193; CHECK-NEXT:    bnez a1, .LBB4_2
194; CHECK-NEXT:  .LBB4_4: # %if.else5
195; CHECK-NEXT:    vfmul.vv v8, v8, v9
196; CHECK-NEXT:    ret
197entry:
198  %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
199  %conv = zext i8 %cond to i32
200  %and = and i32 %conv, 1
201  %tobool = icmp eq i32 %and, 0
202  br i1 %tobool, label %if.else, label %if.then
203
204if.then:                                          ; preds = %entry
205  %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
206  br label %if.end
207
208if.else:                                          ; preds = %entry
209  %2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
210  br label %if.end
211
212if.end:                                           ; preds = %if.else, %if.then
213  %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
214  %and2 = and i32 %conv, 2
215  %tobool3 = icmp eq i32 %and2, 0
216  br i1 %tobool3, label %if.else5, label %if.then4
217
218if.then4:                                         ; preds = %if.end
219  %3 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 7, i64 %0)
220  br label %if.end6
221
222if.else5:                                         ; preds = %if.end
223  %4 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %c.0, i64 7, i64 %0)
224  br label %if.end6
225
226if.end6:                                          ; preds = %if.else5, %if.then4
227  %c.1 = phi <vscale x 1 x double> [ %3, %if.then4 ], [ %4, %if.else5 ]
228  ret <vscale x 1 x double> %c.1
229}
230
231; FIXME: The explicit vsetvli in if.then4 could be removed as it is redundant
232; with the one in the entry, but we lack the ability to remove explicit
233; vsetvli instructions.
234define <vscale x 1 x double> @test6(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
235; CHECK-LABEL: test6:
236; CHECK:       # %bb.0: # %entry
237; CHECK-NEXT:    andi a2, a1, 1
238; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
239; CHECK-NEXT:    bnez a2, .LBB5_3
240; CHECK-NEXT:  # %bb.1: # %if.else
241; CHECK-NEXT:    vfsub.vv v8, v8, v9
242; CHECK-NEXT:    andi a1, a1, 2
243; CHECK-NEXT:    beqz a1, .LBB5_4
244; CHECK-NEXT:  .LBB5_2: # %if.then4
245; CHECK-NEXT:    lui a1, %hi(.LCPI5_0)
246; CHECK-NEXT:    fld fa5, %lo(.LCPI5_0)(a1)
247; CHECK-NEXT:    lui a1, %hi(.LCPI5_1)
248; CHECK-NEXT:    fld fa4, %lo(.LCPI5_1)(a1)
249; CHECK-NEXT:    vfmv.v.f v9, fa5
250; CHECK-NEXT:    vfmv.v.f v10, fa4
251; CHECK-NEXT:    vfadd.vv v9, v9, v10
252; CHECK-NEXT:    lui a1, %hi(scratch)
253; CHECK-NEXT:    addi a1, a1, %lo(scratch)
254; CHECK-NEXT:    vse64.v v9, (a1)
255; CHECK-NEXT:    j .LBB5_5
256; CHECK-NEXT:  .LBB5_3: # %if.then
257; CHECK-NEXT:    vfadd.vv v8, v8, v9
258; CHECK-NEXT:    andi a1, a1, 2
259; CHECK-NEXT:    bnez a1, .LBB5_2
260; CHECK-NEXT:  .LBB5_4: # %if.else5
261; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
262; CHECK-NEXT:    lui a1, 260096
263; CHECK-NEXT:    vmv.v.x v9, a1
264; CHECK-NEXT:    lui a1, 262144
265; CHECK-NEXT:    vmv.v.x v10, a1
266; CHECK-NEXT:    vfadd.vv v9, v9, v10
267; CHECK-NEXT:    lui a1, %hi(scratch)
268; CHECK-NEXT:    addi a1, a1, %lo(scratch)
269; CHECK-NEXT:    vse32.v v9, (a1)
270; CHECK-NEXT:  .LBB5_5: # %if.end10
271; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
272; CHECK-NEXT:    vfmul.vv v8, v8, v8
273; CHECK-NEXT:    ret
274entry:
275  %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
276  %conv = zext i8 %cond to i32
277  %and = and i32 %conv, 1
278  %tobool = icmp eq i32 %and, 0
279  br i1 %tobool, label %if.else, label %if.then
280
281if.then:                                          ; preds = %entry
282  %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
283  br label %if.end
284
285if.else:                                          ; preds = %entry
286  %2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
287  br label %if.end
288
289if.end:                                           ; preds = %if.else, %if.then
290  %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
291  %and2 = and i32 %conv, 2
292  %tobool3 = icmp eq i32 %and2, 0
293  br i1 %tobool3, label %if.else5, label %if.then4
294
295if.then4:                                         ; preds = %if.end
296  %3 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
297  %4 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double> undef, double 1.000000e+00, i64 %3)
298  %5 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double> undef, double 2.000000e+00, i64 %3)
299  %6 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %4, <vscale x 1 x double> %5, i64 7, i64 %3)
300  %7 = bitcast ptr @scratch to ptr
301  tail call void @llvm.riscv.vse.nxv1f64(<vscale x 1 x double> %6, ptr %7, i64 %3)
302  br label %if.end10
303
304if.else5:                                         ; preds = %if.end
305  %8 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 0)
306  %9 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(<vscale x 2 x float> undef, float 1.000000e+00, i64 %8)
307  %10 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32( <vscale x 2 x float> undef, float 2.000000e+00, i64 %8)
308  %11 = tail call <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %9, <vscale x 2 x float> %10, i64 7, i64 %8)
309  %12 = bitcast ptr @scratch to ptr
310  tail call void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float> %11, ptr %12, i64 %8)
311  br label %if.end10
312
313if.end10:                                         ; preds = %if.else5, %if.then4
314  %13 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %c.0, i64 7, i64 %0)
315  ret <vscale x 1 x double> %13
316}
317
318declare void @foo()
319
320; Similar to test1, but contains a call to @foo to act as barrier to analyzing
321; VL/VTYPE.
322define <vscale x 1 x double> @test8(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
323; CHECK-LABEL: test8:
324; CHECK:       # %bb.0: # %entry
325; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
326; CHECK-NEXT:    beqz a1, .LBB6_2
327; CHECK-NEXT:  # %bb.1: # %if.then
328; CHECK-NEXT:    vfadd.vv v8, v8, v9
329; CHECK-NEXT:    ret
330; CHECK-NEXT:  .LBB6_2: # %if.else
331; CHECK-NEXT:    addi sp, sp, -32
332; CHECK-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
333; CHECK-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
334; CHECK-NEXT:    csrr a1, vlenb
335; CHECK-NEXT:    slli a1, a1, 1
336; CHECK-NEXT:    sub sp, sp, a1
337; CHECK-NEXT:    mv s0, a0
338; CHECK-NEXT:    csrr a0, vlenb
339; CHECK-NEXT:    add a0, a0, sp
340; CHECK-NEXT:    addi a0, a0, 16
341; CHECK-NEXT:    vs1r.v v9, (a0) # Unknown-size Folded Spill
342; CHECK-NEXT:    addi a0, sp, 16
343; CHECK-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
344; CHECK-NEXT:    call foo
345; CHECK-NEXT:    csrr a0, vlenb
346; CHECK-NEXT:    add a0, a0, sp
347; CHECK-NEXT:    addi a0, a0, 16
348; CHECK-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
349; CHECK-NEXT:    addi a0, sp, 16
350; CHECK-NEXT:    vl1r.v v9, (a0) # Unknown-size Folded Reload
351; CHECK-NEXT:    vsetvli zero, s0, e64, m1, ta, ma
352; CHECK-NEXT:    vfsub.vv v8, v9, v8
353; CHECK-NEXT:    csrr a0, vlenb
354; CHECK-NEXT:    slli a0, a0, 1
355; CHECK-NEXT:    add sp, sp, a0
356; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
357; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
358; CHECK-NEXT:    addi sp, sp, 32
359; CHECK-NEXT:    ret
360entry:
361  %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
362  %tobool = icmp eq i8 %cond, 0
363  br i1 %tobool, label %if.else, label %if.then
364
365if.then:                                          ; preds = %entry
366  %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
367  br label %if.end
368
369if.else:                                          ; preds = %entry
370  call void @foo()
371  %2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
372  br label %if.end
373
374if.end:                                           ; preds = %if.else, %if.then
375  %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
376  ret <vscale x 1 x double> %c.0
377}
378
379; Similar to test2, but contains a call to @foo to act as barrier to analyzing
380; VL/VTYPE.
381define <vscale x 1 x double> @test9(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
382; CHECK-LABEL: test9:
383; CHECK:       # %bb.0: # %entry
384; CHECK-NEXT:    addi sp, sp, -32
385; CHECK-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
386; CHECK-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
387; CHECK-NEXT:    csrr a2, vlenb
388; CHECK-NEXT:    slli a2, a2, 1
389; CHECK-NEXT:    sub sp, sp, a2
390; CHECK-NEXT:    mv s0, a0
391; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
392; CHECK-NEXT:    beqz a1, .LBB7_2
393; CHECK-NEXT:  # %bb.1: # %if.then
394; CHECK-NEXT:    vfadd.vv v9, v8, v9
395; CHECK-NEXT:    addi a0, sp, 16
396; CHECK-NEXT:    vs1r.v v9, (a0) # Unknown-size Folded Spill
397; CHECK-NEXT:    csrr a0, vlenb
398; CHECK-NEXT:    add a0, a0, sp
399; CHECK-NEXT:    addi a0, a0, 16
400; CHECK-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
401; CHECK-NEXT:    call foo
402; CHECK-NEXT:    addi a0, sp, 16
403; CHECK-NEXT:    vl1r.v v9, (a0) # Unknown-size Folded Reload
404; CHECK-NEXT:    csrr a0, vlenb
405; CHECK-NEXT:    add a0, a0, sp
406; CHECK-NEXT:    addi a0, a0, 16
407; CHECK-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
408; CHECK-NEXT:    j .LBB7_3
409; CHECK-NEXT:  .LBB7_2: # %if.else
410; CHECK-NEXT:    vfsub.vv v9, v8, v9
411; CHECK-NEXT:  .LBB7_3: # %if.end
412; CHECK-NEXT:    vsetvli zero, s0, e64, m1, ta, ma
413; CHECK-NEXT:    vfmul.vv v8, v9, v8
414; CHECK-NEXT:    csrr a0, vlenb
415; CHECK-NEXT:    slli a0, a0, 1
416; CHECK-NEXT:    add sp, sp, a0
417; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
418; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
419; CHECK-NEXT:    addi sp, sp, 32
420; CHECK-NEXT:    ret
421entry:
422  %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
423  %tobool = icmp eq i8 %cond, 0
424  br i1 %tobool, label %if.else, label %if.then
425
426if.then:                                          ; preds = %entry
427  %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
428  call void @foo()
429  br label %if.end
430
431if.else:                                          ; preds = %entry
432  %2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
433  br label %if.end
434
435if.end:                                           ; preds = %if.else, %if.then
436  %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
437  %3 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 7, i64 %0)
438  ret <vscale x 1 x double> %3
439}
440
441define void @saxpy_vec(i64 %n, float %a, ptr nocapture readonly %x, ptr nocapture %y) {
442; CHECK-LABEL: saxpy_vec:
443; CHECK:       # %bb.0: # %entry
444; CHECK-NEXT:    vsetvli a3, a0, e32, m8, ta, ma
445; CHECK-NEXT:    beqz a3, .LBB8_2
446; CHECK-NEXT:  .LBB8_1: # %for.body
447; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
448; CHECK-NEXT:    vle32.v v8, (a1)
449; CHECK-NEXT:    vle32.v v16, (a2)
450; CHECK-NEXT:    slli a4, a3, 2
451; CHECK-NEXT:    sub a0, a0, a3
452; CHECK-NEXT:    add a1, a1, a4
453; CHECK-NEXT:    vsetvli zero, zero, e32, m8, tu, ma
454; CHECK-NEXT:    vfmacc.vf v16, fa0, v8
455; CHECK-NEXT:    vse32.v v16, (a2)
456; CHECK-NEXT:    vsetvli a3, a0, e32, m8, ta, ma
457; CHECK-NEXT:    add a2, a2, a4
458; CHECK-NEXT:    bnez a3, .LBB8_1
459; CHECK-NEXT:  .LBB8_2: # %for.end
460; CHECK-NEXT:    ret
461entry:
462  %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 3)
463  %cmp.not13 = icmp eq i64 %0, 0
464  br i1 %cmp.not13, label %for.end, label %for.body
465
466for.body:                                         ; preds = %for.body, %entry
467  %1 = phi i64 [ %7, %for.body ], [ %0, %entry ]
468  %n.addr.016 = phi i64 [ %sub, %for.body ], [ %n, %entry ]
469  %x.addr.015 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
470  %y.addr.014 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
471  %2 = bitcast ptr %x.addr.015 to ptr
472  %3 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, ptr %2, i64 %1)
473  %add.ptr = getelementptr inbounds float, ptr %x.addr.015, i64 %1
474  %4 = bitcast ptr %y.addr.014 to ptr
475  %5 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, ptr %4, i64 %1)
476  %6 = tail call <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float> %5, float %a, <vscale x 16 x float> %3, i64 7, i64 %1, i64 0)
477  tail call void @llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float> %6, ptr %4, i64 %1)
478  %add.ptr1 = getelementptr inbounds float, ptr %y.addr.014, i64 %1
479  %sub = sub i64 %n.addr.016, %1
480  %7 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %sub, i64 2, i64 3)
481  %cmp.not = icmp eq i64 %7, 0
482  br i1 %cmp.not, label %for.end, label %for.body
483
484for.end:                                          ; preds = %for.body, %entry
485  ret void
486}
487
488define void @saxpy_vec_demanded_fields(i64 %n, float %a, ptr nocapture readonly %x, ptr nocapture %y) {
489; CHECK-LABEL: saxpy_vec_demanded_fields:
490; CHECK:       # %bb.0: # %entry
491; CHECK-NEXT:    vsetvli a3, a0, e32, m8, ta, ma
492; CHECK-NEXT:    beqz a3, .LBB9_2
493; CHECK-NEXT:  .LBB9_1: # %for.body
494; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
495; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
496; CHECK-NEXT:    vle32.v v8, (a1)
497; CHECK-NEXT:    vle32.v v16, (a2)
498; CHECK-NEXT:    slli a4, a3, 2
499; CHECK-NEXT:    sub a0, a0, a3
500; CHECK-NEXT:    add a1, a1, a4
501; CHECK-NEXT:    vsetvli zero, zero, e32, m8, tu, ma
502; CHECK-NEXT:    vfmacc.vf v16, fa0, v8
503; CHECK-NEXT:    vse32.v v16, (a2)
504; CHECK-NEXT:    vsetvli a3, a0, e16, m4, ta, ma
505; CHECK-NEXT:    add a2, a2, a4
506; CHECK-NEXT:    bnez a3, .LBB9_1
507; CHECK-NEXT:  .LBB9_2: # %for.end
508; CHECK-NEXT:    ret
509entry:
510  %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 3)
511  %cmp.not13 = icmp eq i64 %0, 0
512  br i1 %cmp.not13, label %for.end, label %for.body
513
514for.body:                                         ; preds = %for.body, %entry
515  %1 = phi i64 [ %7, %for.body ], [ %0, %entry ]
516  %n.addr.016 = phi i64 [ %sub, %for.body ], [ %n, %entry ]
517  %x.addr.015 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
518  %y.addr.014 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
519  %2 = bitcast ptr %x.addr.015 to ptr
520  %3 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, ptr %2, i64 %1)
521  %add.ptr = getelementptr inbounds float, ptr %x.addr.015, i64 %1
522  %4 = bitcast ptr %y.addr.014 to ptr
523  %5 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, ptr %4, i64 %1)
524  %6 = tail call <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float> %5, float %a, <vscale x 16 x float> %3, i64 7, i64 %1, i64 0)
525  tail call void @llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float> %6, ptr %4, i64 %1)
526  %add.ptr1 = getelementptr inbounds float, ptr %y.addr.014, i64 %1
527  %sub = sub i64 %n.addr.016, %1
528  %7 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %sub, i64 1, i64 2)
529  %cmp.not = icmp eq i64 %7, 0
530  br i1 %cmp.not, label %for.end, label %for.body
531
532for.end:                                          ; preds = %for.body, %entry
533  ret void
534}
535
536declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg)
537declare <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float>, ptr nocapture, i64)
538declare <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float>, float, <vscale x 16 x float>, i64, i64, i64)
539declare void @llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float>, ptr nocapture, i64)
540
541; We need a vsetvli in the last block because the predecessors have different
542; VTYPEs. The AVL is the same and the SEW/LMUL ratio implies the same VLMAX so
543; we don't need to read AVL and can keep VL unchanged.
544define <vscale x 2 x i32> @test_vsetvli_x0_x0(ptr %x, ptr %y, <vscale x 2 x i32> %z, i64 %vl, i1 %cond) nounwind {
545; CHECK-LABEL: test_vsetvli_x0_x0:
546; CHECK:       # %bb.0: # %entry
547; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
548; CHECK-NEXT:    vle32.v v9, (a0)
549; CHECK-NEXT:    andi a3, a3, 1
550; CHECK-NEXT:    beqz a3, .LBB10_2
551; CHECK-NEXT:  # %bb.1: # %if
552; CHECK-NEXT:    vle16.v v10, (a1)
553; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
554; CHECK-NEXT:    vwcvt.x.x.v v8, v10
555; CHECK-NEXT:  .LBB10_2: # %if.end
556; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
557; CHECK-NEXT:    vadd.vv v8, v9, v8
558; CHECK-NEXT:    ret
559entry:
560  %a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32> undef, ptr %x, i64 %vl)
561  br i1 %cond, label %if, label %if.end
562
563if:
564  %b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %y, i64 %vl)
565  %c = call <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i16> %b, i16 0, i64 %vl)
566  br label %if.end
567
568if.end:
569  %d = phi <vscale x 2 x i32> [ %z, %entry ], [ %c, %if ]
570  %e = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i32> %d, i64 %vl)
571  ret <vscale x 2 x i32> %e
572}
573declare <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>, ptr, i64)
574declare <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>, ptr, i64)
575declare <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i16>, i16, i64)
576declare <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, i64)
577
578; We can use X0, X0 vsetvli in if2 and if2.end. The merge point as if.end will
579; see two different vtypes with the same SEW/LMUL ratio. At if2.end we will only
580; know the SEW/LMUL ratio for the if.end predecessor and the full vtype for
581; the if2 predecessor. This makes sure we can merge a SEW/LMUL predecessor with
582; a predecessor we know the vtype for.
583define <vscale x 2 x i32> @test_vsetvli_x0_x0_2(ptr %x, ptr %y, ptr %z, i64 %vl, i1 %cond, i1 %cond2, <vscale x 2 x i32> %w) nounwind {
584; CHECK-LABEL: test_vsetvli_x0_x0_2:
585; CHECK:       # %bb.0: # %entry
586; CHECK-NEXT:    vsetvli zero, a3, e32, m1, ta, ma
587; CHECK-NEXT:    vle32.v v9, (a0)
588; CHECK-NEXT:    andi a4, a4, 1
589; CHECK-NEXT:    beqz a4, .LBB11_2
590; CHECK-NEXT:  # %bb.1: # %if
591; CHECK-NEXT:    vle16.v v10, (a1)
592; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
593; CHECK-NEXT:    vwadd.wv v9, v9, v10
594; CHECK-NEXT:  .LBB11_2: # %if.end
595; CHECK-NEXT:    andi a5, a5, 1
596; CHECK-NEXT:    beqz a5, .LBB11_4
597; CHECK-NEXT:  # %bb.3: # %if2
598; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
599; CHECK-NEXT:    vle16.v v10, (a2)
600; CHECK-NEXT:    vwadd.wv v9, v9, v10
601; CHECK-NEXT:  .LBB11_4: # %if2.end
602; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
603; CHECK-NEXT:    vadd.vv v8, v9, v8
604; CHECK-NEXT:    ret
605entry:
606  %a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32> undef, ptr %x, i64 %vl)
607  br i1 %cond, label %if, label %if.end
608
609if:
610  %b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %y, i64 %vl)
611  %c = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i16> %b, i64 %vl)
612  br label %if.end
613
614if.end:
615  %d = phi <vscale x 2 x i32> [ %a, %entry ], [ %c, %if ]
616  br i1 %cond2, label %if2, label %if2.end
617
618if2:
619  %e = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %z, i64 %vl)
620  %f = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32> undef, <vscale x 2 x i32> %d, <vscale x 2 x i16> %e, i64 %vl)
621  br label %if2.end
622
623if2.end:
624  %g = phi <vscale x 2 x i32> [ %d, %if.end ], [ %f, %if2 ]
625  %h = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %g, <vscale x 2 x i32> %w, i64 %vl)
626  ret <vscale x 2 x i32> %h
627}
628declare <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i16>, i64)
629
630; We should only need 1 vsetvli for this code.
631define void @vlmax(i64 %N, ptr %c, ptr %a, ptr %b) {
632; CHECK-LABEL: vlmax:
633; CHECK:       # %bb.0: # %entry
634; CHECK-NEXT:    blez a0, .LBB12_3
635; CHECK-NEXT:  # %bb.1: # %for.body.preheader
636; CHECK-NEXT:    li a4, 0
637; CHECK-NEXT:    vsetvli a6, zero, e64, m1, ta, ma
638; CHECK-NEXT:    slli a5, a6, 3
639; CHECK-NEXT:  .LBB12_2: # %for.body
640; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
641; CHECK-NEXT:    vle64.v v8, (a2)
642; CHECK-NEXT:    vle64.v v9, (a3)
643; CHECK-NEXT:    add a4, a4, a6
644; CHECK-NEXT:    add a3, a3, a5
645; CHECK-NEXT:    vfadd.vv v8, v8, v9
646; CHECK-NEXT:    vse64.v v8, (a1)
647; CHECK-NEXT:    add a1, a1, a5
648; CHECK-NEXT:    add a2, a2, a5
649; CHECK-NEXT:    blt a4, a0, .LBB12_2
650; CHECK-NEXT:  .LBB12_3: # %for.end
651; CHECK-NEXT:    ret
652entry:
653  %0 = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0)
654  %cmp13 = icmp sgt i64 %N, 0
655  br i1 %cmp13, label %for.body, label %for.end
656
657for.body:                                         ; preds = %entry, %for.body
658  %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ]
659  %arrayidx = getelementptr inbounds double, ptr %a, i64 %i.014
660  %1 = bitcast ptr %arrayidx to ptr
661  %2 = tail call <vscale x 1 x double> @llvm.riscv.vle.nxv1f64.i64(<vscale x 1 x double> undef, ptr %1, i64 %0)
662  %arrayidx1 = getelementptr inbounds double, ptr %b, i64 %i.014
663  %3 = bitcast ptr %arrayidx1 to ptr
664  %4 = tail call <vscale x 1 x double> @llvm.riscv.vle.nxv1f64.i64(<vscale x 1 x double> undef, ptr %3, i64 %0)
665  %5 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double> undef, <vscale x 1 x double> %2, <vscale x 1 x double> %4, i64 7, i64 %0)
666  %arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014
667  %6 = bitcast ptr %arrayidx2 to ptr
668  tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> %5, ptr %6, i64 %0)
669  %add = add nuw nsw i64 %i.014, %0
670  %cmp = icmp slt i64 %add, %N
671  br i1 %cmp, label %for.body, label %for.end
672
673for.end:                                          ; preds = %for.body, %entry
674  ret void
675}
676
677; A single vector store in the loop with VL controlled by VLMAX
678define void @vector_init_vlmax(i64 %N, ptr %c) {
679; CHECK-LABEL: vector_init_vlmax:
680; CHECK:       # %bb.0: # %entry
681; CHECK-NEXT:    blez a0, .LBB13_3
682; CHECK-NEXT:  # %bb.1: # %for.body.preheader
683; CHECK-NEXT:    li a2, 0
684; CHECK-NEXT:    vsetvli a3, zero, e64, m1, ta, ma
685; CHECK-NEXT:    slli a4, a3, 3
686; CHECK-NEXT:    vmv.v.i v8, 0
687; CHECK-NEXT:  .LBB13_2: # %for.body
688; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
689; CHECK-NEXT:    vse64.v v8, (a1)
690; CHECK-NEXT:    add a2, a2, a3
691; CHECK-NEXT:    add a1, a1, a4
692; CHECK-NEXT:    blt a2, a0, .LBB13_2
693; CHECK-NEXT:  .LBB13_3: # %for.end
694; CHECK-NEXT:    ret
695entry:
696  %0 = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0)
697  %cmp13 = icmp sgt i64 %N, 0
698  br i1 %cmp13, label %for.body, label %for.end
699
700for.body:                                         ; preds = %entry, %for.body
701  %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ]
702  %arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014
703  %addr = bitcast ptr %arrayidx2 to ptr
704  tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 %0)
705  %add = add nuw nsw i64 %i.014, %0
706  %cmp = icmp slt i64 %add, %N
707  br i1 %cmp, label %for.body, label %for.end
708
709for.end:                                          ; preds = %for.body, %entry
710  ret void
711}
712
713; Same as above, but VL comes from user provided AVL value
714define void @vector_init_vsetvli_N(i64 %N, ptr %c) {
715; CHECK-LABEL: vector_init_vsetvli_N:
716; CHECK:       # %bb.0: # %entry
717; CHECK-NEXT:    blez a0, .LBB14_3
718; CHECK-NEXT:  # %bb.1: # %for.body.preheader
719; CHECK-NEXT:    li a2, 0
720; CHECK-NEXT:    vsetvli a3, a0, e64, m1, ta, ma
721; CHECK-NEXT:    slli a4, a3, 3
722; CHECK-NEXT:    vmv.v.i v8, 0
723; CHECK-NEXT:  .LBB14_2: # %for.body
724; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
725; CHECK-NEXT:    vse64.v v8, (a1)
726; CHECK-NEXT:    add a2, a2, a3
727; CHECK-NEXT:    add a1, a1, a4
728; CHECK-NEXT:    blt a2, a0, .LBB14_2
729; CHECK-NEXT:  .LBB14_3: # %for.end
730; CHECK-NEXT:    ret
731entry:
732  %0 = tail call i64 @llvm.riscv.vsetvli(i64 %N, i64 3, i64 0)
733  %cmp13 = icmp sgt i64 %N, 0
734  br i1 %cmp13, label %for.body, label %for.end
735
736for.body:                                         ; preds = %entry, %for.body
737  %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ]
738  %arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014
739  %addr = bitcast ptr %arrayidx2 to ptr
740  tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 %0)
741  %add = add nuw nsw i64 %i.014, %0
742  %cmp = icmp slt i64 %add, %N
743  br i1 %cmp, label %for.body, label %for.end
744
745for.end:                                          ; preds = %for.body, %entry
746  ret void
747}
748
749; Same as above, but VL is a hard coded constant (in the preheader)
750define void @vector_init_vsetvli_fv(i64 %N, ptr %c) {
751; CHECK-LABEL: vector_init_vsetvli_fv:
752; CHECK:       # %bb.0: # %entry
753; CHECK-NEXT:    li a2, 0
754; CHECK-NEXT:    vsetivli a3, 4, e64, m1, ta, ma
755; CHECK-NEXT:    slli a4, a3, 3
756; CHECK-NEXT:    vmv.v.i v8, 0
757; CHECK-NEXT:  .LBB15_1: # %for.body
758; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
759; CHECK-NEXT:    vse64.v v8, (a1)
760; CHECK-NEXT:    add a2, a2, a3
761; CHECK-NEXT:    add a1, a1, a4
762; CHECK-NEXT:    blt a2, a0, .LBB15_1
763; CHECK-NEXT:  # %bb.2: # %for.end
764; CHECK-NEXT:    ret
765entry:
766  %0 = tail call i64 @llvm.riscv.vsetvli(i64 4, i64 3, i64 0)
767  br label %for.body
768
769for.body:                                         ; preds = %entry, %for.body
770  %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ]
771  %arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014
772  %addr = bitcast ptr %arrayidx2 to ptr
773  tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 %0)
774  %add = add nuw nsw i64 %i.014, %0
775  %cmp = icmp slt i64 %add, %N
776  br i1 %cmp, label %for.body, label %for.end
777
778for.end:                                          ; preds = %for.body
779  ret void
780}
781
782; Same as above, but result of vsetvli in preheader isn't used, and
783; constant is repeated in loop
784define void @vector_init_vsetvli_fv2(i64 %N, ptr %c) {
785; CHECK-LABEL: vector_init_vsetvli_fv2:
786; CHECK:       # %bb.0: # %entry
787; CHECK-NEXT:    li a2, 0
788; CHECK-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
789; CHECK-NEXT:    vmv.v.i v8, 0
790; CHECK-NEXT:  .LBB16_1: # %for.body
791; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
792; CHECK-NEXT:    vse64.v v8, (a1)
793; CHECK-NEXT:    addi a2, a2, 4
794; CHECK-NEXT:    addi a1, a1, 32
795; CHECK-NEXT:    blt a2, a0, .LBB16_1
796; CHECK-NEXT:  # %bb.2: # %for.end
797; CHECK-NEXT:    ret
798entry:
799  tail call i64 @llvm.riscv.vsetvli(i64 4, i64 3, i64 0)
800  br label %for.body
801
802for.body:                                         ; preds = %entry, %for.body
803  %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ]
804  %arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014
805  %addr = bitcast ptr %arrayidx2 to ptr
806  tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 4)
807  %add = add nuw nsw i64 %i.014, 4
808  %cmp = icmp slt i64 %add, %N
809  br i1 %cmp, label %for.body, label %for.end
810
811for.end:                                          ; preds = %for.body
812  ret void
813}
814
815; Same as above, but AVL is only specified on the store intrinsic
816; This case will require some form of hoisting or PRE
817define void @vector_init_vsetvli_fv3(i64 %N, ptr %c) {
818; CHECK-LABEL: vector_init_vsetvli_fv3:
819; CHECK:       # %bb.0: # %entry
820; CHECK-NEXT:    li a2, 0
821; CHECK-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
822; CHECK-NEXT:    vmv.v.i v8, 0
823; CHECK-NEXT:  .LBB17_1: # %for.body
824; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
825; CHECK-NEXT:    vse64.v v8, (a1)
826; CHECK-NEXT:    addi a2, a2, 4
827; CHECK-NEXT:    addi a1, a1, 32
828; CHECK-NEXT:    blt a2, a0, .LBB17_1
829; CHECK-NEXT:  # %bb.2: # %for.end
830; CHECK-NEXT:    ret
831entry:
832  br label %for.body
833
834for.body:                                         ; preds = %entry, %for.body
835  %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ]
836  %arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014
837  %addr = bitcast ptr %arrayidx2 to ptr
838  tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 4)
839  %add = add nuw nsw i64 %i.014, 4
840  %cmp = icmp slt i64 %add, %N
841  br i1 %cmp, label %for.body, label %for.end
842
843for.end:                                          ; preds = %for.body
844  ret void
845}
846
847; Demonstrates a case where mutation in phase3 is problematic.  We mutate the
848; vsetvli without considering that it changes the compatibility result of the
849; vadd in the second block.
850define <vscale x 4 x i32> @cross_block_mutate(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b,
851; CHECK-LABEL: cross_block_mutate:
852; CHECK:       # %bb.0: # %entry
853; CHECK-NEXT:    vsetivli a0, 6, e32, m2, tu, ma
854; CHECK-NEXT:    vmv.s.x v8, a0
855; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
856; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
857; CHECK-NEXT:    ret
858                                         <vscale x 4 x i1> %mask) {
859entry:
860  %vl = tail call i64 @llvm.riscv.vsetvli(i64 6, i64 1, i64 0)
861  %vl.trunc = trunc i64 %vl to i32
862  %a.mod = insertelement <vscale x 4 x i32> %a, i32 %vl.trunc, i32 0
863  br label %fallthrough
864
865fallthrough:
866  %res = call <vscale x 4 x i32> @llvm.riscv.vadd.mask.nxv4i32.nxv4i32(
867               <vscale x 4 x i32> undef, <vscale x 4 x i32> %a.mod,
868               <vscale x 4 x i32> %b, <vscale x 4 x i1> %mask, i64 %vl, i64 0)
869  ret <vscale x 4 x i32> %res
870}
871
872define <vscale x 2 x i32> @pre_lmul(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i1 %cond) nounwind {
873; CHECK-LABEL: pre_lmul:
874; CHECK:       # %bb.0: # %entry
875; CHECK-NEXT:    andi a0, a0, 1
876; CHECK-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
877; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
878; CHECK-NEXT:    vadd.vv v8, v8, v9
879; CHECK-NEXT:    vadd.vv v8, v8, v9
880; CHECK-NEXT:    ret
881entry:
882  %vl = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0)
883  %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 %vl)
884  br i1 %cond, label %if, label %if.end
885
886if:
887  ; Deliberately change vtype - this could be an unknown call, but the broader
888  ; code quality is distractingly bad
889  tail call i64 @llvm.riscv.vsetvlimax.i64(i64 2, i64 1)
890  br label %if.end
891
892if.end:
893  %b = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i32> %y, i64 %vl)
894  ret <vscale x 2 x i32> %b
895}
896
897define <vscale x 1 x double> @compat_store_consistency(i1 %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b, ptr %p1, <vscale x 1 x float> %c, ptr %p2) {
898; CHECK-LABEL: compat_store_consistency:
899; CHECK:       # %bb.0: # %entry
900; CHECK-NEXT:    andi a0, a0, 1
901; CHECK-NEXT:    vsetvli a3, zero, e64, m1, ta, ma
902; CHECK-NEXT:    vfadd.vv v8, v8, v9
903; CHECK-NEXT:    vs1r.v v8, (a1)
904; CHECK-NEXT:    beqz a0, .LBB20_2
905; CHECK-NEXT:  # %bb.1: # %if.then
906; CHECK-NEXT:    vse32.v v10, (a2)
907; CHECK-NEXT:  .LBB20_2: # %if.end
908; CHECK-NEXT:    ret
909entry:
910  %res = fadd <vscale x 1 x double> %a, %b
911  store <vscale x 1 x double> %res, ptr %p1
912  br i1 %cond, label %if.then, label %if.end
913
914if.then:                                          ; preds = %entry
915  store <vscale x 1 x float> %c, ptr %p2
916  br label %if.end
917
918if.end:                                           ; preds = %if.else, %if.then
919  ret <vscale x 1 x double> %res
920}
921
922; Next two tests (which are the same except for swapped block order), make sure that the
923; demanded reasoning around vmv.s.x correctly handles a forward state with only a valid
924; SEWLMULRatio.  We previously had a crash bug in this case.
925define <vscale x 2 x i32> @test_ratio_only_vmv_s_x(ptr %x, ptr %y, i1 %cond) nounwind {
926; CHECK-LABEL: test_ratio_only_vmv_s_x:
927; CHECK:       # %bb.0: # %entry
928; CHECK-NEXT:    andi a2, a2, 1
929; CHECK-NEXT:    beqz a2, .LBB21_2
930; CHECK-NEXT:  # %bb.1: # %if
931; CHECK-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
932; CHECK-NEXT:    vle16.v v9, (a1)
933; CHECK-NEXT:    vwcvt.x.x.v v8, v9
934; CHECK-NEXT:    j .LBB21_3
935; CHECK-NEXT:  .LBB21_2:
936; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
937; CHECK-NEXT:    vle32.v v8, (a0)
938; CHECK-NEXT:  .LBB21_3: # %if.end
939; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
940; CHECK-NEXT:    vmv.s.x v8, zero
941; CHECK-NEXT:    ret
942entry:
943  %a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32> undef, ptr %x, i64 2)
944  br i1 %cond, label %if, label %if.end
945
946if:
947  %b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %y, i64 2)
948  %c = call <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i16> %b, i16 0, i64 2)
949  br label %if.end
950
951if.end:
952  %d = phi <vscale x 2 x i32> [ %a, %entry ], [ %c, %if ]
953  %e = insertelement <vscale x 2 x i32> %d, i32 0, i32 0
954  ret <vscale x 2 x i32> %e
955}
956
957define <vscale x 2 x i32> @test_ratio_only_vmv_s_x2(ptr %x, ptr %y, i1 %cond) nounwind {
958; CHECK-LABEL: test_ratio_only_vmv_s_x2:
959; CHECK:       # %bb.0: # %entry
960; CHECK-NEXT:    andi a2, a2, 1
961; CHECK-NEXT:    beqz a2, .LBB22_2
962; CHECK-NEXT:  # %bb.1: # %if
963; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
964; CHECK-NEXT:    vle32.v v8, (a0)
965; CHECK-NEXT:    j .LBB22_3
966; CHECK-NEXT:  .LBB22_2:
967; CHECK-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
968; CHECK-NEXT:    vle16.v v9, (a1)
969; CHECK-NEXT:    vwcvt.x.x.v v8, v9
970; CHECK-NEXT:  .LBB22_3: # %if.end
971; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
972; CHECK-NEXT:    vmv.s.x v8, zero
973; CHECK-NEXT:    ret
974entry:
975  %b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %y, i64 2)
976  %c = call <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i16> %b, i16 0, i64 2)
977  br i1 %cond, label %if, label %if.end
978
979if:
980  %a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32> undef, ptr %x, i64 2)
981  br label %if.end
982
983if.end:
984  %d = phi <vscale x 2 x i32> [ %a, %if ], [ %c, %entry ]
985  %e = insertelement <vscale x 2 x i32> %d, i32 0, i32 0
986  ret <vscale x 2 x i32> %e
987}
988
989; This case demonstrates a PRE case where the first instruction in the block
990; doesn't require a state transition.
991define void @pre_over_vle(ptr %A) {
992; CHECK-LABEL: pre_over_vle:
993; CHECK:       # %bb.0: # %entry
994; CHECK-NEXT:    addi a1, a0, 800
995; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
996; CHECK-NEXT:  .LBB23_1: # %vector.body
997; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
998; CHECK-NEXT:    vle8.v v8, (a0)
999; CHECK-NEXT:    vsext.vf4 v9, v8
1000; CHECK-NEXT:    vse32.v v9, (a0)
1001; CHECK-NEXT:    addi a0, a0, 8
1002; CHECK-NEXT:    bne a0, a1, .LBB23_1
1003; CHECK-NEXT:  # %bb.2: # %exit
1004; CHECK-NEXT:    ret
1005entry:
1006  br label %vector.body
1007
1008vector.body:
1009  %iv = phi i64 [ 0, %entry], [%iv.next, %vector.body]
1010  %addr = getelementptr inbounds <2 x i32>, ptr %A, i64 %iv
1011  %v = load <2 x i8>, ptr %addr
1012  %v2 = sext <2 x i8> %v to <2 x i32>
1013  store <2 x i32> %v2, ptr %addr
1014  %iv.next = add i64 %iv, 1
1015  %cmp = icmp ne i64 %iv.next, 100
1016  br i1 %cmp, label %vector.body, label %exit
1017exit:
1018  ret void
1019}
1020
1021declare i64 @llvm.riscv.vsetvlimax.i64(i64, i64)
1022declare <vscale x 1 x double> @llvm.riscv.vle.nxv1f64.i64(<vscale x 1 x double>, ptr nocapture, i64)
1023declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, i64, i64)
1024declare void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double>, ptr nocapture, i64)
1025declare <vscale x 4 x i32> @llvm.riscv.vadd.mask.nxv4i32.nxv4i32(
1026  <vscale x 4 x i32>,
1027  <vscale x 4 x i32>,
1028  <vscale x 4 x i32>,
1029  <vscale x 4 x i1>,
1030  i64,
1031  i64);
1032
1033; Normally a pseudo's AVL is already live in its block, so it will already be
1034; live where we're inserting the vsetvli, before the pseudo.  In some cases the
1035; AVL can be from a predecessor block, so make sure we extend its live range
1036; across blocks.
1037define <vscale x 2 x i32> @cross_block_avl_extend(i64 %avl, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
1038; CHECK-LABEL: cross_block_avl_extend:
1039; CHECK:       # %bb.0: # %entry
1040; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
1041; CHECK-NEXT:    vadd.vv v9, v8, v9
1042; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1043; CHECK-NEXT:    vadd.vv v8, v8, v9
1044; CHECK-NEXT:    ret
1045entry:
1046  ; Get the output vl from a vsetvli
1047  %vl = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 0)
1048  ; Force a vsetvli toggle so we need to insert a new vsetvli in exit
1049  %d = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b, i64 1)
1050  br label %exit
1051exit:
1052  ; The use of the vl from the vsetvli will be replaced with its %avl because
1053  ; VLMAX is the same. So %avl, which was previously only live in %entry, will
1054  ; need to be extended down toe %exit.
1055  %c = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i32> %d, i64 %vl)
1056  ret <vscale x 2 x i32> %c
1057}
1058
1059define void @cross_block_avl_extend_backwards(i1 %cond, <vscale x 8 x i8> %v, ptr %p, i64 %avl) {
1060; CHECK-LABEL: cross_block_avl_extend_backwards:
1061; CHECK:       # %bb.0: # %entry
1062; CHECK-NEXT:    andi a0, a0, 1
1063; CHECK-NEXT:    beqz a0, .LBB25_2
1064; CHECK-NEXT:  # %bb.1: # %exit
1065; CHECK-NEXT:    ret
1066; CHECK-NEXT:  .LBB25_2: # %bar
1067; CHECK-NEXT:    addi a2, a2, 1
1068; CHECK-NEXT:  .LBB25_3: # %foo
1069; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1070; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1071; CHECK-NEXT:    vse8.v v8, (a1)
1072; CHECK-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
1073; CHECK-NEXT:    vse8.v v8, (a1)
1074; CHECK-NEXT:    j .LBB25_3
1075entry:
1076  br i1 %cond, label %exit, label %bar
1077foo:
1078  ; Force a vl toggle
1079  call void @llvm.riscv.vse.nxv8i8.i64(<vscale x 8 x i8> %v, ptr %p, i64 1)
1080  ; %add's LiveRange needs to be extended backwards to here.
1081  call void @llvm.riscv.vse.nxv8i8.i64(<vscale x 8 x i8> %v, ptr %p, i64 %add)
1082  br label %foo
1083exit:
1084  ret void
1085bar:
1086  %add = add i64 %avl, 1
1087  br label %foo
1088}
1089
1090define void @vlmax_avl_phi(i1 %cmp, ptr %p, i64 %a, i64 %b) {
1091; CHECK-LABEL: vlmax_avl_phi:
1092; CHECK:       # %bb.0: # %entry
1093; CHECK-NEXT:    andi a0, a0, 1
1094; CHECK-NEXT:    beqz a0, .LBB26_2
1095; CHECK-NEXT:  # %bb.1: # %foo
1096; CHECK-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
1097; CHECK-NEXT:    j .LBB26_3
1098; CHECK-NEXT:  .LBB26_2: # %bar
1099; CHECK-NEXT:    vsetvli zero, a3, e8, m1, ta, ma
1100; CHECK-NEXT:  .LBB26_3: # %exit
1101; CHECK-NEXT:    vmv.v.i v8, 0
1102; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1103; CHECK-NEXT:    vse8.v v8, (a1)
1104; CHECK-NEXT:    ret
1105entry:
1106  br i1 %cmp, label %foo, label %bar
1107
1108foo:
1109  %vl.foo = tail call i64 @llvm.riscv.vsetvli.i64(i64 %a, i64 0, i64 0)
1110  br label %exit
1111
1112bar:
1113  %vl.bar = tail call i64 @llvm.riscv.vsetvli.i64(i64 %b, i64 0, i64 0)
1114  br label %exit
1115
1116exit:
1117  %phivl = phi i64 [ %vl.foo, %foo ], [ %vl.bar, %bar ]
1118  %1 = tail call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8> poison, i8 0, i64 %phivl)
1119  call void @llvm.riscv.vse.nxv8i8(<vscale x 8 x i8> %1, ptr %p, i64 1)
1120  ret void
1121}
1122
1123; Check that if we forward an AVL whose value is clobbered in its LiveInterval
1124; we emit a copy instead.
1125define <vscale x 4 x i32> @clobbered_forwarded_avl(i64 %n, <vscale x 4 x i32> %v, i1 %cmp) {
1126; CHECK-LABEL: clobbered_forwarded_avl:
1127; CHECK:       # %bb.0: # %entry
1128; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1129; CHECK-NEXT:    andi a1, a1, 1
1130; CHECK-NEXT:  .LBB27_1: # %for.body
1131; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1132; CHECK-NEXT:    addi a0, a0, 1
1133; CHECK-NEXT:    bnez a1, .LBB27_1
1134; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
1135; CHECK-NEXT:    vadd.vv v10, v8, v8
1136; CHECK-NEXT:    vadd.vv v8, v10, v8
1137; CHECK-NEXT:    ret
1138entry:
1139  %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 1)
1140  br label %for.body
1141
1142for.body:
1143  ; Use %n in a PHI here so its virtual register is assigned to a second time here.
1144  %1 = phi i64 [ %3, %for.body ], [ %n, %entry ]
1145  %2 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %1, i64 0, i64 0)
1146  %3 = add i64 %1, 1
1147  br i1 %cmp, label %for.body, label %for.cond.cleanup
1148
1149for.cond.cleanup:
1150  %4 = tail call <vscale x 4 x i32> @llvm.riscv.vadd.nxv2f32.nxv2f32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32> %v, <vscale x 4 x i32> %v, i64 -1)
1151  ; VL toggle needed here: If the %n AVL was forwarded here we wouldn't be able
1152  ; to extend it's LiveInterval because it would clobber the assignment at %1.
1153  %5 = tail call <vscale x 4 x i32> @llvm.riscv.vadd.nxv2f32.nxv2f32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32> %4, <vscale x 4 x i32> %v, i64 %0)
1154  ret <vscale x 4 x i32> %5
1155}
1156