xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll (revision ab393cee9dffdb225b94badcb9c21f80b156b74b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2
3; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
4; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32
5; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
6; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64
7
8declare <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
9  <vscale x 1 x i8>,
10  <vscale x 1 x i8>,
11  <vscale x 1 x i8>,
12  iXLen, iXLen);
13declare <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
14  <vscale x 1 x i8>,
15  <vscale x 1 x i8>,
16  <vscale x 1 x i8>,
17  iXLen, iXLen);
18
19; Test same rounding mode in one block.
20define <vscale x 1 x i8> @test1(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
21; CHECK-LABEL: test1:
22; CHECK:       # %bb.0: # %entry
23; CHECK-NEXT:    csrwi vxrm, 0
24; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
25; CHECK-NEXT:    vaadd.vv v8, v8, v9
26; CHECK-NEXT:    vaadd.vv v8, v8, v10
27; CHECK-NEXT:    ret
28entry:
29  %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
30    <vscale x 1 x i8> undef,
31    <vscale x 1 x i8> %0,
32    <vscale x 1 x i8> %1,
33    iXLen 0, iXLen %3)
34  %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
35    <vscale x 1 x i8> undef,
36    <vscale x 1 x i8> %a,
37    <vscale x 1 x i8> %2,
38    iXLen 0, iXLen %3)
39
40  ret <vscale x 1 x i8> %b
41}
42
43; Test different rounding mode.
44define <vscale x 1 x i8> @test2(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
45; CHECK-LABEL: test2:
46; CHECK:       # %bb.0: # %entry
47; CHECK-NEXT:    csrwi vxrm, 2
48; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
49; CHECK-NEXT:    vaadd.vv v8, v8, v9
50; CHECK-NEXT:    csrwi vxrm, 0
51; CHECK-NEXT:    vaadd.vv v8, v8, v10
52; CHECK-NEXT:    ret
53entry:
54  %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
55    <vscale x 1 x i8> undef,
56    <vscale x 1 x i8> %0,
57    <vscale x 1 x i8> %1,
58    iXLen 2, iXLen %3)
59  %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
60    <vscale x 1 x i8> undef,
61    <vscale x 1 x i8> %a,
62    <vscale x 1 x i8> %2,
63    iXLen 0, iXLen %3)
64
65  ret <vscale x 1 x i8> %b
66}
67
68declare <vscale x 1 x i8> @foo(<vscale x 1 x i8>)
69
70; Test same vxrm with call in between which may invalidate vxrm.
71define <vscale x 1 x i8> @test3(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
72; RV32-LABEL: test3:
73; RV32:       # %bb.0: # %entry
74; RV32-NEXT:    addi sp, sp, -32
75; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
76; RV32-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
77; RV32-NEXT:    csrr a1, vlenb
78; RV32-NEXT:    sub sp, sp, a1
79; RV32-NEXT:    mv s0, a0
80; RV32-NEXT:    addi a1, sp, 16
81; RV32-NEXT:    vs1r.v v10, (a1) # Unknown-size Folded Spill
82; RV32-NEXT:    csrwi vxrm, 0
83; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
84; RV32-NEXT:    vaadd.vv v8, v8, v9
85; RV32-NEXT:    call foo
86; RV32-NEXT:    csrwi vxrm, 0
87; RV32-NEXT:    addi a0, sp, 16
88; RV32-NEXT:    vl1r.v v9, (a0) # Unknown-size Folded Reload
89; RV32-NEXT:    vsetvli zero, s0, e8, mf8, ta, ma
90; RV32-NEXT:    vaadd.vv v8, v8, v9
91; RV32-NEXT:    csrr a0, vlenb
92; RV32-NEXT:    add sp, sp, a0
93; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
94; RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
95; RV32-NEXT:    addi sp, sp, 32
96; RV32-NEXT:    ret
97;
98; RV64-LABEL: test3:
99; RV64:       # %bb.0: # %entry
100; RV64-NEXT:    addi sp, sp, -32
101; RV64-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
102; RV64-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
103; RV64-NEXT:    csrr a1, vlenb
104; RV64-NEXT:    sub sp, sp, a1
105; RV64-NEXT:    mv s0, a0
106; RV64-NEXT:    addi a1, sp, 16
107; RV64-NEXT:    vs1r.v v10, (a1) # Unknown-size Folded Spill
108; RV64-NEXT:    csrwi vxrm, 0
109; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
110; RV64-NEXT:    vaadd.vv v8, v8, v9
111; RV64-NEXT:    call foo
112; RV64-NEXT:    csrwi vxrm, 0
113; RV64-NEXT:    addi a0, sp, 16
114; RV64-NEXT:    vl1r.v v9, (a0) # Unknown-size Folded Reload
115; RV64-NEXT:    vsetvli zero, s0, e8, mf8, ta, ma
116; RV64-NEXT:    vaadd.vv v8, v8, v9
117; RV64-NEXT:    csrr a0, vlenb
118; RV64-NEXT:    add sp, sp, a0
119; RV64-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
120; RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
121; RV64-NEXT:    addi sp, sp, 32
122; RV64-NEXT:    ret
123entry:
124  %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
125    <vscale x 1 x i8> undef,
126    <vscale x 1 x i8> %0,
127    <vscale x 1 x i8> %1,
128    iXLen 0, iXLen %3)
129  %b = call <vscale x 1 x i8> @foo(<vscale x 1 x i8> %a)
130  %c = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
131    <vscale x 1 x i8> undef,
132    <vscale x 1 x i8> %b,
133    <vscale x 1 x i8> %2,
134    iXLen 0, iXLen %3)
135
136  ret <vscale x 1 x i8> %c
137}
138
139; Test same vxrm with asm in between which may invalidate vxrm.
140define <vscale x 1 x i8> @test4(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
141; CHECK-LABEL: test4:
142; CHECK:       # %bb.0: # %entry
143; CHECK-NEXT:    csrwi vxrm, 0
144; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
145; CHECK-NEXT:    vaadd.vv v8, v8, v9
146; CHECK-NEXT:    #APP
147; CHECK-NEXT:    #NO_APP
148; CHECK-NEXT:    csrwi vxrm, 0
149; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
150; CHECK-NEXT:    vaadd.vv v8, v8, v10
151; CHECK-NEXT:    ret
152entry:
153  %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
154    <vscale x 1 x i8> undef,
155    <vscale x 1 x i8> %0,
156    <vscale x 1 x i8> %1,
157    iXLen 0, iXLen %3)
158  %b = call <vscale x 1 x i8> asm "", "=^vr,0"(<vscale x 1 x i8> %a)
159  %c = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
160    <vscale x 1 x i8> undef,
161    <vscale x 1 x i8> %b,
162    <vscale x 1 x i8> %2,
163    iXLen 0, iXLen %3)
164
165  ret <vscale x 1 x i8> %c
166}
167
168; Test same rounding mode in triangle.
169define <vscale x 1 x i8> @test5(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
170; CHECK-LABEL: test5:
171; CHECK:       # %bb.0: # %entry
172; CHECK-NEXT:    andi a1, a1, 1
173; CHECK-NEXT:    csrwi vxrm, 0
174; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
175; CHECK-NEXT:    vaadd.vv v8, v8, v9
176; CHECK-NEXT:    beqz a1, .LBB4_2
177; CHECK-NEXT:  # %bb.1: # %condblock
178; CHECK-NEXT:    vaadd.vv v8, v8, v10
179; CHECK-NEXT:  .LBB4_2: # %mergeblock
180; CHECK-NEXT:    ret
181entry:
182  %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
183    <vscale x 1 x i8> undef,
184    <vscale x 1 x i8> %0,
185    <vscale x 1 x i8> %1,
186    iXLen 0, iXLen %3)
187  br i1 %cond, label %condblock, label %mergeblock
188
189condblock:
190  %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
191    <vscale x 1 x i8> undef,
192    <vscale x 1 x i8> %a,
193    <vscale x 1 x i8> %2,
194    iXLen 0, iXLen %3)
195  br label %mergeblock
196
197mergeblock:
198  %c = phi <vscale x 1 x i8> [%a, %entry], [%b, %condblock]
199
200  ret <vscale x 1 x i8> %c
201}
202
203; Test same rounding mode in diamond with no dominating vxrm.
204define <vscale x 1 x i8> @test6(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
205; CHECK-LABEL: test6:
206; CHECK:       # %bb.0: # %entry
207; CHECK-NEXT:    andi a1, a1, 1
208; CHECK-NEXT:    csrwi vxrm, 0
209; CHECK-NEXT:    beqz a1, .LBB5_2
210; CHECK-NEXT:  # %bb.1: # %trueblock
211; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
212; CHECK-NEXT:    vaadd.vv v8, v8, v9
213; CHECK-NEXT:    ret
214; CHECK-NEXT:  .LBB5_2: # %falseblock
215; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
216; CHECK-NEXT:    vaadd.vv v8, v8, v10
217; CHECK-NEXT:    ret
218entry:
219  br i1 %cond, label %trueblock, label %falseblock
220
221trueblock:
222  %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
223    <vscale x 1 x i8> undef,
224    <vscale x 1 x i8> %0,
225    <vscale x 1 x i8> %1,
226    iXLen 0, iXLen %3)
227  br label %mergeblock
228
229falseblock:
230  %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
231    <vscale x 1 x i8> undef,
232    <vscale x 1 x i8> %0,
233    <vscale x 1 x i8> %2,
234    iXLen 0, iXLen %3)
235  br label %mergeblock
236
237mergeblock:
238  %c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
239
240  ret <vscale x 1 x i8> %c
241}
242
243; Test same rounding mode in diamond with same dominating vxrm.
244define <vscale x 1 x i8> @test7(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
245; CHECK-LABEL: test7:
246; CHECK:       # %bb.0: # %entry
247; CHECK-NEXT:    andi a1, a1, 1
248; CHECK-NEXT:    csrwi vxrm, 0
249; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
250; CHECK-NEXT:    vaadd.vv v8, v8, v9
251; CHECK-NEXT:    beqz a1, .LBB6_2
252; CHECK-NEXT:  # %bb.1: # %trueblock
253; CHECK-NEXT:    vaadd.vv v8, v8, v10
254; CHECK-NEXT:    ret
255; CHECK-NEXT:  .LBB6_2: # %falseblock
256; CHECK-NEXT:    vasub.vv v8, v8, v10
257; CHECK-NEXT:    ret
258entry:
259  %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
260    <vscale x 1 x i8> undef,
261    <vscale x 1 x i8> %0,
262    <vscale x 1 x i8> %1,
263    iXLen 0, iXLen %3)
264  br i1 %cond, label %trueblock, label %falseblock
265
266trueblock:
267  %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
268    <vscale x 1 x i8> undef,
269    <vscale x 1 x i8> %a,
270    <vscale x 1 x i8> %2,
271    iXLen 0, iXLen %3)
272  br label %mergeblock
273
274falseblock:
275  %c = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
276    <vscale x 1 x i8> undef,
277    <vscale x 1 x i8> %a,
278    <vscale x 1 x i8> %2,
279    iXLen 0, iXLen %3)
280  br label %mergeblock
281
282mergeblock:
283  %d = phi <vscale x 1 x i8> [%b, %trueblock], [%c, %falseblock]
284
285  ret <vscale x 1 x i8> %d
286}
287
288; Test same rounding mode in diamond with same vxrm at merge.
289define <vscale x 1 x i8> @test8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
290; CHECK-LABEL: test8:
291; CHECK:       # %bb.0: # %entry
292; CHECK-NEXT:    andi a1, a1, 1
293; CHECK-NEXT:    csrwi vxrm, 0
294; CHECK-NEXT:    beqz a1, .LBB7_2
295; CHECK-NEXT:  # %bb.1: # %trueblock
296; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
297; CHECK-NEXT:    vaadd.vv v8, v8, v9
298; CHECK-NEXT:    vaadd.vv v8, v8, v10
299; CHECK-NEXT:    ret
300; CHECK-NEXT:  .LBB7_2: # %falseblock
301; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
302; CHECK-NEXT:    vasub.vv v8, v8, v9
303; CHECK-NEXT:    vaadd.vv v8, v8, v10
304; CHECK-NEXT:    ret
305entry:
306  br i1 %cond, label %trueblock, label %falseblock
307
308trueblock:
309  %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
310    <vscale x 1 x i8> undef,
311    <vscale x 1 x i8> %0,
312    <vscale x 1 x i8> %1,
313    iXLen 0, iXLen %3)
314  br label %mergeblock
315
316falseblock:
317  %b = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
318    <vscale x 1 x i8> undef,
319    <vscale x 1 x i8> %0,
320    <vscale x 1 x i8> %1,
321    iXLen 0, iXLen %3)
322  br label %mergeblock
323
324mergeblock:
325  %c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
326  %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
327    <vscale x 1 x i8> undef,
328    <vscale x 1 x i8> %c,
329    <vscale x 1 x i8> %2,
330    iXLen 0, iXLen %3)
331
332  ret <vscale x 1 x i8> %d
333}
334
335; Test same rounding mode in diamond with different vxrm at merge.
336define <vscale x 1 x i8> @test9(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
337; CHECK-LABEL: test9:
338; CHECK:       # %bb.0: # %entry
339; CHECK-NEXT:    andi a1, a1, 1
340; CHECK-NEXT:    csrwi vxrm, 0
341; CHECK-NEXT:    beqz a1, .LBB8_2
342; CHECK-NEXT:  # %bb.1: # %trueblock
343; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
344; CHECK-NEXT:    vaadd.vv v8, v8, v9
345; CHECK-NEXT:    j .LBB8_3
346; CHECK-NEXT:  .LBB8_2: # %falseblock
347; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
348; CHECK-NEXT:    vasub.vv v8, v8, v9
349; CHECK-NEXT:  .LBB8_3: # %mergeblock
350; CHECK-NEXT:    csrwi vxrm, 2
351; CHECK-NEXT:    vaadd.vv v8, v8, v10
352; CHECK-NEXT:    ret
353entry:
354  br i1 %cond, label %trueblock, label %falseblock
355
356trueblock:
357  %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
358    <vscale x 1 x i8> undef,
359    <vscale x 1 x i8> %0,
360    <vscale x 1 x i8> %1,
361    iXLen 0, iXLen %3)
362  br label %mergeblock
363
364falseblock:
365  %b = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
366    <vscale x 1 x i8> undef,
367    <vscale x 1 x i8> %0,
368    <vscale x 1 x i8> %1,
369    iXLen 0, iXLen %3)
370  br label %mergeblock
371
372mergeblock:
373  %c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
374  %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
375    <vscale x 1 x i8> undef,
376    <vscale x 1 x i8> %c,
377    <vscale x 1 x i8> %2,
378    iXLen 2, iXLen %3)
379
380  ret <vscale x 1 x i8> %d
381}
382
383; Test loop with no dominating vxrm write.
384define void @test10(ptr nocapture %ptr_dest, ptr nocapture readonly %ptr_op1, ptr nocapture readonly %ptr_op2, iXLen %n) {
385; CHECK-LABEL: test10:
386; CHECK:       # %bb.0: # %entry
387; CHECK-NEXT:    beqz a3, .LBB9_3
388; CHECK-NEXT:  # %bb.1: # %for.body.preheader
389; CHECK-NEXT:    csrwi vxrm, 2
390; CHECK-NEXT:  .LBB9_2: # %for.body
391; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
392; CHECK-NEXT:    vsetvli a4, a3, e8, mf8, ta, ma
393; CHECK-NEXT:    vle8.v v8, (a1)
394; CHECK-NEXT:    vle8.v v9, (a2)
395; CHECK-NEXT:    vaadd.vv v8, v8, v9
396; CHECK-NEXT:    sub a3, a3, a4
397; CHECK-NEXT:    vse8.v v8, (a0)
398; CHECK-NEXT:    bnez a3, .LBB9_2
399; CHECK-NEXT:  .LBB9_3: # %for.end
400; CHECK-NEXT:    ret
401entry:
402  %tobool.not9 = icmp eq iXLen %n, 0
403  br i1 %tobool.not9, label %for.end, label %for.body
404
405for.body:
406  %n.addr.011 = phi iXLen [ %n, %entry ], [ %sub, %for.body ]
407  %vl = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n.addr.011, iXLen 0, iXLen 5)
408  %load1 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl)
409  %load2 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl)
410  %vadd = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1, <vscale x 1 x i8> %load2, iXLen 2, iXLen %vl)
411  tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadd, ptr %ptr_dest, iXLen %vl)
412  %sub = sub iXLen %n.addr.011, %vl
413  %tobool.not = icmp eq iXLen %sub, 0
414  br i1 %tobool.not, label %for.end, label %for.body
415
416for.end:
417  ret void
418}
419
420declare iXLen @llvm.riscv.vsetvli.iXLen(iXLen, iXLen immarg, iXLen immarg)
421declare <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8>, ptr nocapture, iXLen)
422declare void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8>, ptr nocapture, iXLen)
423
424; Test loop with dominating vxrm write. Make sure there is no write in the loop.
425define void @test11(ptr nocapture %ptr_dest, ptr nocapture readonly %ptr_op1, ptr nocapture readonly %ptr_op2, iXLen %n) {
426; CHECK-LABEL: test11:
427; CHECK:       # %bb.0: # %entry
428; CHECK-NEXT:    vsetvli a4, a3, e8, mf8, ta, ma
429; CHECK-NEXT:    vle8.v v8, (a1)
430; CHECK-NEXT:    vle8.v v9, (a2)
431; CHECK-NEXT:    csrwi vxrm, 2
432; CHECK-NEXT:  .LBB10_1: # %for.body
433; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
434; CHECK-NEXT:    vaadd.vv v8, v8, v9
435; CHECK-NEXT:    sub a3, a3, a4
436; CHECK-NEXT:    vse8.v v8, (a0)
437; CHECK-NEXT:    beqz a3, .LBB10_3
438; CHECK-NEXT:  # %bb.2: # %for.body
439; CHECK-NEXT:    # in Loop: Header=BB10_1 Depth=1
440; CHECK-NEXT:    vsetvli a4, a3, e8, mf8, ta, ma
441; CHECK-NEXT:    vle8.v v8, (a1)
442; CHECK-NEXT:    vle8.v v9, (a2)
443; CHECK-NEXT:    j .LBB10_1
444; CHECK-NEXT:  .LBB10_3: # %for.end
445; CHECK-NEXT:    ret
446entry:
447  %vl = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n, iXLen 0, iXLen 5)
448  %load1a = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl)
449  %load2a = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl)
450  %vadda = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1a, <vscale x 1 x i8> %load2a, iXLen 2, iXLen %vl)
451  tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadda, ptr %ptr_dest, iXLen %vl)
452  %suba = sub iXLen %n, %vl
453  %tobool.not9 = icmp eq iXLen %suba, 0
454  br i1 %tobool.not9, label %for.end, label %for.body
455
456for.body:
457  %n.addr.011 = phi iXLen [ %suba, %entry ], [ %sub, %for.body ]
458  %vl2 = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n.addr.011, iXLen 0, iXLen 5)
459  %load1 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl2)
460  %load2 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl2)
461  %vadd = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1, <vscale x 1 x i8> %load2, iXLen 2, iXLen %vl2)
462  tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadd, ptr %ptr_dest, iXLen %vl2)
463  %sub = sub iXLen %n.addr.011, %vl2
464  %tobool.not = icmp eq iXLen %sub, 0
465  br i1 %tobool.not, label %for.end, label %for.body
466
467for.end:
468  ret void
469}
470
471; The edge from entry to block2 is a critical edge. The vxrm write in block2
472; is redundant when coming from block1, but is needed when coming from entry.
473; FIXME: We could remove the write from the end of block1 without splitting the
474; critical edge.
475define <vscale x 1 x i8> @test12(i1 %c1, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %vl) {
476; CHECK-LABEL: test12:
477; CHECK:       # %bb.0: # %entry
478; CHECK-NEXT:    andi a0, a0, 1
479; CHECK-NEXT:    csrwi vxrm, 0
480; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
481; CHECK-NEXT:    vaadd.vv v9, v8, v9
482; CHECK-NEXT:    beqz a0, .LBB11_2
483; CHECK-NEXT:  # %bb.1: # %block1
484; CHECK-NEXT:    csrwi vxrm, 1
485; CHECK-NEXT:    vaadd.vv v9, v8, v9
486; CHECK-NEXT:    csrwi vxrm, 2
487; CHECK-NEXT:  .LBB11_2: # %block2
488; CHECK-NEXT:    csrwi vxrm, 2
489; CHECK-NEXT:    vaadd.vv v8, v8, v9
490; CHECK-NEXT:    ret
491entry:
492  %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen 0, iXLen %vl)
493  br i1 %c1, label %block1, label %block2
494
495block1:
496  %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %a, iXLen 1, iXLen %vl)
497  br label %block2
498
499block2:
500  %c = phi <vscale x 1 x i8> [ %a, %entry ], [ %b, %block1]
501  %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %c, iXLen 2, iXLen %vl)
502  ret <vscale x 1 x i8> %d
503}
504
505; Similar to test12, but introduces a second critical edge from block1 to
506; block3. Now the write to vxrm at the end of block1, can't be removed because
507; it is needed by block3.
508define <vscale x 1 x i8> @test13(i1 %c1, i1 %c2, i1 %c3, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %vl) {
509; CHECK-LABEL: test13:
510; CHECK:       # %bb.0: # %entry
511; CHECK-NEXT:    andi a0, a0, 1
512; CHECK-NEXT:    csrwi vxrm, 0
513; CHECK-NEXT:    vsetvli zero, a3, e8, mf8, ta, ma
514; CHECK-NEXT:    vaadd.vv v10, v8, v9
515; CHECK-NEXT:    beqz a0, .LBB12_2
516; CHECK-NEXT:  # %bb.1: # %block1
517; CHECK-NEXT:    csrwi vxrm, 1
518; CHECK-NEXT:    vaadd.vv v10, v8, v10
519; CHECK-NEXT:    andi a1, a1, 1
520; CHECK-NEXT:    csrwi vxrm, 2
521; CHECK-NEXT:    beqz a1, .LBB12_3
522; CHECK-NEXT:  .LBB12_2: # %block2
523; CHECK-NEXT:    csrwi vxrm, 2
524; CHECK-NEXT:    vaadd.vv v8, v8, v10
525; CHECK-NEXT:    ret
526; CHECK-NEXT:  .LBB12_3: # %block3
527; CHECK-NEXT:    vaadd.vv v8, v9, v10
528; CHECK-NEXT:    ret
529entry:
530  %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen 0, iXLen %vl)
531  br i1 %c1, label %block1, label %block2
532
533block1:
534  %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %a, iXLen 1, iXLen %vl)
535  br i1 %c2, label %block2, label %block3
536
537block2:
538  %c = phi <vscale x 1 x i8> [ %a, %entry ], [ %b, %block1]
539  %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %c, iXLen 2, iXLen %vl)
540  ret <vscale x 1 x i8> %d
541
542block3:
543  %e = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %1, <vscale x 1 x i8> %b, iXLen 2, iXLen %vl)
544  ret <vscale x 1 x i8> %e
545}
546