xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll (revision 8ce81f17a16b8b689895c7c093d0401a75c09882)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
3; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
4; RUN:     --check-prefixes=CHECK,ZVFH
5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
6; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
7; RUN:     --check-prefixes=CHECK,ZVFH
8; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
9; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
10; RUN:     --check-prefixes=CHECK,ZVFHMIN
11; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
12; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
13; RUN:     --check-prefixes=CHECK,ZVFHMIN
14
15declare <vscale x 1 x bfloat> @llvm.vp.rint.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
16
17define <vscale x 1 x bfloat> @vp_rint_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
18; CHECK-LABEL: vp_rint_nxv1bf16:
19; CHECK:       # %bb.0:
20; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
21; CHECK-NEXT:    vmv1r.v v9, v0
22; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
23; CHECK-NEXT:    lui a0, 307200
24; CHECK-NEXT:    vmv1r.v v8, v0
25; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
26; CHECK-NEXT:    vfabs.v v11, v10, v0.t
27; CHECK-NEXT:    fmv.w.x fa5, a0
28; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
29; CHECK-NEXT:    vmflt.vf v8, v11, fa5, v0.t
30; CHECK-NEXT:    vmv1r.v v0, v8
31; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
32; CHECK-NEXT:    vfcvt.x.f.v v11, v10, v0.t
33; CHECK-NEXT:    vfcvt.f.x.v v11, v11, v0.t
34; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
35; CHECK-NEXT:    vfsgnj.vv v10, v11, v10, v0.t
36; CHECK-NEXT:    vmv1r.v v0, v9
37; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
38; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
39; CHECK-NEXT:    ret
40  %v = call <vscale x 1 x bfloat> @llvm.vp.rint.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
41  ret <vscale x 1 x bfloat> %v
42}
43
44define <vscale x 1 x bfloat> @vp_rint_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) {
45; CHECK-LABEL: vp_rint_nxv1bf16_unmasked:
46; CHECK:       # %bb.0:
47; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
48; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
49; CHECK-NEXT:    lui a0, 307200
50; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
51; CHECK-NEXT:    vfabs.v v8, v9
52; CHECK-NEXT:    fmv.w.x fa5, a0
53; CHECK-NEXT:    vmflt.vf v0, v8, fa5
54; CHECK-NEXT:    vfcvt.x.f.v v8, v9, v0.t
55; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
56; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
57; CHECK-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
58; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
59; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
60; CHECK-NEXT:    ret
61  %v = call <vscale x 1 x bfloat> @llvm.vp.rint.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
62  ret <vscale x 1 x bfloat> %v
63}
64
65declare <vscale x 2 x bfloat> @llvm.vp.rint.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
66
67define <vscale x 2 x bfloat> @vp_rint_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
68; CHECK-LABEL: vp_rint_nxv2bf16:
69; CHECK:       # %bb.0:
70; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
71; CHECK-NEXT:    vmv1r.v v9, v0
72; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
73; CHECK-NEXT:    lui a0, 307200
74; CHECK-NEXT:    vmv1r.v v8, v0
75; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
76; CHECK-NEXT:    vfabs.v v11, v10, v0.t
77; CHECK-NEXT:    fmv.w.x fa5, a0
78; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
79; CHECK-NEXT:    vmflt.vf v8, v11, fa5, v0.t
80; CHECK-NEXT:    vmv.v.v v0, v8
81; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
82; CHECK-NEXT:    vfcvt.x.f.v v11, v10, v0.t
83; CHECK-NEXT:    vfcvt.f.x.v v11, v11, v0.t
84; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
85; CHECK-NEXT:    vfsgnj.vv v10, v11, v10, v0.t
86; CHECK-NEXT:    vmv1r.v v0, v9
87; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
88; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
89; CHECK-NEXT:    ret
90  %v = call <vscale x 2 x bfloat> @llvm.vp.rint.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
91  ret <vscale x 2 x bfloat> %v
92}
93
94define <vscale x 2 x bfloat> @vp_rint_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) {
95; CHECK-LABEL: vp_rint_nxv2bf16_unmasked:
96; CHECK:       # %bb.0:
97; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
98; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
99; CHECK-NEXT:    lui a0, 307200
100; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
101; CHECK-NEXT:    vfabs.v v8, v9
102; CHECK-NEXT:    fmv.w.x fa5, a0
103; CHECK-NEXT:    vmflt.vf v0, v8, fa5
104; CHECK-NEXT:    vfcvt.x.f.v v8, v9, v0.t
105; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
106; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
107; CHECK-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
108; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
109; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
110; CHECK-NEXT:    ret
111  %v = call <vscale x 2 x bfloat> @llvm.vp.rint.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
112  ret <vscale x 2 x bfloat> %v
113}
114
115declare <vscale x 4 x bfloat> @llvm.vp.rint.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
116
117define <vscale x 4 x bfloat> @vp_rint_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
118; CHECK-LABEL: vp_rint_nxv4bf16:
119; CHECK:       # %bb.0:
120; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
121; CHECK-NEXT:    vmv1r.v v9, v0
122; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
123; CHECK-NEXT:    lui a0, 307200
124; CHECK-NEXT:    vmv1r.v v8, v0
125; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
126; CHECK-NEXT:    vfabs.v v12, v10, v0.t
127; CHECK-NEXT:    fmv.w.x fa5, a0
128; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
129; CHECK-NEXT:    vmflt.vf v8, v12, fa5, v0.t
130; CHECK-NEXT:    vmv1r.v v0, v8
131; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
132; CHECK-NEXT:    vfcvt.x.f.v v12, v10, v0.t
133; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
134; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
135; CHECK-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
136; CHECK-NEXT:    vmv1r.v v0, v9
137; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
138; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
139; CHECK-NEXT:    ret
140  %v = call <vscale x 4 x bfloat> @llvm.vp.rint.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl)
141  ret <vscale x 4 x bfloat> %v
142}
143
144define <vscale x 4 x bfloat> @vp_rint_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) {
145; CHECK-LABEL: vp_rint_nxv4bf16_unmasked:
146; CHECK:       # %bb.0:
147; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
148; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
149; CHECK-NEXT:    lui a0, 307200
150; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
151; CHECK-NEXT:    vfabs.v v8, v10
152; CHECK-NEXT:    fmv.w.x fa5, a0
153; CHECK-NEXT:    vmflt.vf v0, v8, fa5
154; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
155; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
156; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
157; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
158; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
159; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
160; CHECK-NEXT:    ret
161  %v = call <vscale x 4 x bfloat> @llvm.vp.rint.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
162  ret <vscale x 4 x bfloat> %v
163}
164
165declare <vscale x 8 x bfloat> @llvm.vp.rint.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
166
167define <vscale x 8 x bfloat> @vp_rint_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
168; CHECK-LABEL: vp_rint_nxv8bf16:
169; CHECK:       # %bb.0:
170; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
171; CHECK-NEXT:    vmv1r.v v10, v0
172; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
173; CHECK-NEXT:    lui a0, 307200
174; CHECK-NEXT:    vmv1r.v v8, v0
175; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
176; CHECK-NEXT:    vfabs.v v16, v12, v0.t
177; CHECK-NEXT:    fmv.w.x fa5, a0
178; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
179; CHECK-NEXT:    vmflt.vf v8, v16, fa5, v0.t
180; CHECK-NEXT:    vmv1r.v v0, v8
181; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
182; CHECK-NEXT:    vfcvt.x.f.v v16, v12, v0.t
183; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
184; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
185; CHECK-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
186; CHECK-NEXT:    vmv1r.v v0, v10
187; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
188; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
189; CHECK-NEXT:    ret
190  %v = call <vscale x 8 x bfloat> @llvm.vp.rint.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl)
191  ret <vscale x 8 x bfloat> %v
192}
193
194define <vscale x 8 x bfloat> @vp_rint_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) {
195; CHECK-LABEL: vp_rint_nxv8bf16_unmasked:
196; CHECK:       # %bb.0:
197; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
198; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
199; CHECK-NEXT:    lui a0, 307200
200; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
201; CHECK-NEXT:    vfabs.v v8, v12
202; CHECK-NEXT:    fmv.w.x fa5, a0
203; CHECK-NEXT:    vmflt.vf v0, v8, fa5
204; CHECK-NEXT:    vfcvt.x.f.v v8, v12, v0.t
205; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
206; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
207; CHECK-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
208; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
209; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
210; CHECK-NEXT:    ret
211  %v = call <vscale x 8 x bfloat> @llvm.vp.rint.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
212  ret <vscale x 8 x bfloat> %v
213}
214
215declare <vscale x 16 x bfloat> @llvm.vp.rint.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
216
217define <vscale x 16 x bfloat> @vp_rint_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
218; CHECK-LABEL: vp_rint_nxv16bf16:
219; CHECK:       # %bb.0:
220; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
221; CHECK-NEXT:    vmv1r.v v12, v0
222; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
223; CHECK-NEXT:    lui a0, 307200
224; CHECK-NEXT:    vmv1r.v v8, v0
225; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
226; CHECK-NEXT:    vfabs.v v24, v16, v0.t
227; CHECK-NEXT:    fmv.w.x fa5, a0
228; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
229; CHECK-NEXT:    vmflt.vf v8, v24, fa5, v0.t
230; CHECK-NEXT:    vmv1r.v v0, v8
231; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
232; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
233; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
234; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
235; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
236; CHECK-NEXT:    vmv1r.v v0, v12
237; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
238; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
239; CHECK-NEXT:    ret
240  %v = call <vscale x 16 x bfloat> @llvm.vp.rint.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl)
241  ret <vscale x 16 x bfloat> %v
242}
243
244define <vscale x 16 x bfloat> @vp_rint_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) {
245; CHECK-LABEL: vp_rint_nxv16bf16_unmasked:
246; CHECK:       # %bb.0:
247; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
248; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
249; CHECK-NEXT:    lui a0, 307200
250; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
251; CHECK-NEXT:    vfabs.v v8, v16
252; CHECK-NEXT:    fmv.w.x fa5, a0
253; CHECK-NEXT:    vmflt.vf v0, v8, fa5
254; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
255; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
256; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
257; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
258; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
259; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
260; CHECK-NEXT:    ret
261  %v = call <vscale x 16 x bfloat> @llvm.vp.rint.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
262  ret <vscale x 16 x bfloat> %v
263}
264
265declare <vscale x 32 x bfloat> @llvm.vp.rint.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
266
267define <vscale x 32 x bfloat> @vp_rint_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
268; CHECK-LABEL: vp_rint_nxv32bf16:
269; CHECK:       # %bb.0:
270; CHECK-NEXT:    addi sp, sp, -16
271; CHECK-NEXT:    .cfi_def_cfa_offset 16
272; CHECK-NEXT:    csrr a1, vlenb
273; CHECK-NEXT:    slli a1, a1, 3
274; CHECK-NEXT:    sub sp, sp, a1
275; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
276; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
277; CHECK-NEXT:    vmv1r.v v7, v0
278; CHECK-NEXT:    csrr a2, vlenb
279; CHECK-NEXT:    lui a3, 307200
280; CHECK-NEXT:    slli a1, a2, 1
281; CHECK-NEXT:    srli a2, a2, 2
282; CHECK-NEXT:    fmv.w.x fa5, a3
283; CHECK-NEXT:    sub a3, a0, a1
284; CHECK-NEXT:    vslidedown.vx v17, v0, a2
285; CHECK-NEXT:    sltu a2, a0, a3
286; CHECK-NEXT:    vmv1r.v v18, v17
287; CHECK-NEXT:    addi a2, a2, -1
288; CHECK-NEXT:    and a2, a2, a3
289; CHECK-NEXT:    vmv1r.v v0, v17
290; CHECK-NEXT:    addi a3, sp, 16
291; CHECK-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
292; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
293; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
294; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
295; CHECK-NEXT:    vfabs.v v8, v24, v0.t
296; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
297; CHECK-NEXT:    vmflt.vf v18, v8, fa5, v0.t
298; CHECK-NEXT:    vmv1r.v v0, v18
299; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
300; CHECK-NEXT:    vfcvt.x.f.v v8, v24, v0.t
301; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
302; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
303; CHECK-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
304; CHECK-NEXT:    vmv1r.v v0, v17
305; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
306; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24, v0.t
307; CHECK-NEXT:    bltu a0, a1, .LBB10_2
308; CHECK-NEXT:  # %bb.1:
309; CHECK-NEXT:    mv a0, a1
310; CHECK-NEXT:  .LBB10_2:
311; CHECK-NEXT:    vmv1r.v v0, v7
312; CHECK-NEXT:    addi a1, sp, 16
313; CHECK-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
314; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
315; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v16, v0.t
316; CHECK-NEXT:    vmv1r.v v8, v7
317; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
318; CHECK-NEXT:    vfabs.v v16, v24, v0.t
319; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
320; CHECK-NEXT:    vmflt.vf v8, v16, fa5, v0.t
321; CHECK-NEXT:    vmv1r.v v0, v8
322; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
323; CHECK-NEXT:    vfcvt.x.f.v v16, v24, v0.t
324; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
325; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
326; CHECK-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
327; CHECK-NEXT:    vmv1r.v v0, v7
328; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
329; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v24, v0.t
330; CHECK-NEXT:    csrr a0, vlenb
331; CHECK-NEXT:    slli a0, a0, 3
332; CHECK-NEXT:    add sp, sp, a0
333; CHECK-NEXT:    .cfi_def_cfa sp, 16
334; CHECK-NEXT:    addi sp, sp, 16
335; CHECK-NEXT:    .cfi_def_cfa_offset 0
336; CHECK-NEXT:    ret
337  %v = call <vscale x 32 x bfloat> @llvm.vp.rint.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl)
338  ret <vscale x 32 x bfloat> %v
339}
340
341define <vscale x 32 x bfloat> @vp_rint_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
342; CHECK-LABEL: vp_rint_nxv32bf16_unmasked:
343; CHECK:       # %bb.0:
344; CHECK-NEXT:    addi sp, sp, -16
345; CHECK-NEXT:    .cfi_def_cfa_offset 16
346; CHECK-NEXT:    csrr a1, vlenb
347; CHECK-NEXT:    slli a1, a1, 3
348; CHECK-NEXT:    sub sp, sp, a1
349; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
350; CHECK-NEXT:    csrr a2, vlenb
351; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
352; CHECK-NEXT:    vmset.m v16
353; CHECK-NEXT:    lui a3, 307200
354; CHECK-NEXT:    slli a1, a2, 1
355; CHECK-NEXT:    srli a2, a2, 2
356; CHECK-NEXT:    fmv.w.x fa5, a3
357; CHECK-NEXT:    sub a3, a0, a1
358; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
359; CHECK-NEXT:    vslidedown.vx v16, v16, a2
360; CHECK-NEXT:    sltu a2, a0, a3
361; CHECK-NEXT:    vmv1r.v v17, v16
362; CHECK-NEXT:    addi a2, a2, -1
363; CHECK-NEXT:    and a2, a2, a3
364; CHECK-NEXT:    vmv1r.v v0, v16
365; CHECK-NEXT:    addi a3, sp, 16
366; CHECK-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
367; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
368; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
369; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
370; CHECK-NEXT:    vfabs.v v8, v24, v0.t
371; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
372; CHECK-NEXT:    vmflt.vf v17, v8, fa5, v0.t
373; CHECK-NEXT:    vmv1r.v v0, v17
374; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
375; CHECK-NEXT:    vfcvt.x.f.v v8, v24, v0.t
376; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
377; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
378; CHECK-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
379; CHECK-NEXT:    vmv1r.v v0, v16
380; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
381; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24, v0.t
382; CHECK-NEXT:    bltu a0, a1, .LBB11_2
383; CHECK-NEXT:  # %bb.1:
384; CHECK-NEXT:    mv a0, a1
385; CHECK-NEXT:  .LBB11_2:
386; CHECK-NEXT:    addi a1, sp, 16
387; CHECK-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
388; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
389; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24
390; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
391; CHECK-NEXT:    vfabs.v v24, v16
392; CHECK-NEXT:    vmflt.vf v0, v24, fa5
393; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
394; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
395; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
396; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
397; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
398; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
399; CHECK-NEXT:    csrr a0, vlenb
400; CHECK-NEXT:    slli a0, a0, 3
401; CHECK-NEXT:    add sp, sp, a0
402; CHECK-NEXT:    .cfi_def_cfa sp, 16
403; CHECK-NEXT:    addi sp, sp, 16
404; CHECK-NEXT:    .cfi_def_cfa_offset 0
405; CHECK-NEXT:    ret
406  %v = call <vscale x 32 x bfloat> @llvm.vp.rint.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
407  ret <vscale x 32 x bfloat> %v
408}
409declare <vscale x 1 x half> @llvm.vp.rint.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
410
411define <vscale x 1 x half> @vp_rint_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
412; ZVFH-LABEL: vp_rint_nxv1f16:
413; ZVFH:       # %bb.0:
414; ZVFH-NEXT:    lui a1, %hi(.LCPI12_0)
415; ZVFH-NEXT:    flh fa5, %lo(.LCPI12_0)(a1)
416; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
417; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
418; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
419; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
420; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
421; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
422; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
423; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
424; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
425; ZVFH-NEXT:    ret
426;
427; ZVFHMIN-LABEL: vp_rint_nxv1f16:
428; ZVFHMIN:       # %bb.0:
429; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
430; ZVFHMIN-NEXT:    vmv1r.v v9, v0
431; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
432; ZVFHMIN-NEXT:    lui a0, 307200
433; ZVFHMIN-NEXT:    vmv1r.v v8, v0
434; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
435; ZVFHMIN-NEXT:    vfabs.v v11, v10, v0.t
436; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
437; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
438; ZVFHMIN-NEXT:    vmflt.vf v8, v11, fa5, v0.t
439; ZVFHMIN-NEXT:    vmv1r.v v0, v8
440; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
441; ZVFHMIN-NEXT:    vfcvt.x.f.v v11, v10, v0.t
442; ZVFHMIN-NEXT:    vfcvt.f.x.v v11, v11, v0.t
443; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
444; ZVFHMIN-NEXT:    vfsgnj.vv v10, v11, v10, v0.t
445; ZVFHMIN-NEXT:    vmv1r.v v0, v9
446; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
447; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
448; ZVFHMIN-NEXT:    ret
449  %v = call <vscale x 1 x half> @llvm.vp.rint.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
450  ret <vscale x 1 x half> %v
451}
452
453define <vscale x 1 x half> @vp_rint_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
454; ZVFH-LABEL: vp_rint_nxv1f16_unmasked:
455; ZVFH:       # %bb.0:
456; ZVFH-NEXT:    lui a1, %hi(.LCPI13_0)
457; ZVFH-NEXT:    flh fa5, %lo(.LCPI13_0)(a1)
458; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
459; ZVFH-NEXT:    vfabs.v v9, v8
460; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
461; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
462; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
463; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
464; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
465; ZVFH-NEXT:    ret
466;
467; ZVFHMIN-LABEL: vp_rint_nxv1f16_unmasked:
468; ZVFHMIN:       # %bb.0:
469; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
470; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
471; ZVFHMIN-NEXT:    lui a0, 307200
472; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
473; ZVFHMIN-NEXT:    vfabs.v v8, v9
474; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
475; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
476; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v9, v0.t
477; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
478; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
479; ZVFHMIN-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
480; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
481; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
482; ZVFHMIN-NEXT:    ret
483  %v = call <vscale x 1 x half> @llvm.vp.rint.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
484  ret <vscale x 1 x half> %v
485}
486
487declare <vscale x 2 x half> @llvm.vp.rint.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
488
489define <vscale x 2 x half> @vp_rint_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
490; ZVFH-LABEL: vp_rint_nxv2f16:
491; ZVFH:       # %bb.0:
492; ZVFH-NEXT:    lui a1, %hi(.LCPI14_0)
493; ZVFH-NEXT:    flh fa5, %lo(.LCPI14_0)(a1)
494; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
495; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
496; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
497; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
498; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
499; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
500; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
501; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
502; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
503; ZVFH-NEXT:    ret
504;
505; ZVFHMIN-LABEL: vp_rint_nxv2f16:
506; ZVFHMIN:       # %bb.0:
507; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
508; ZVFHMIN-NEXT:    vmv1r.v v9, v0
509; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
510; ZVFHMIN-NEXT:    lui a0, 307200
511; ZVFHMIN-NEXT:    vmv1r.v v8, v0
512; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
513; ZVFHMIN-NEXT:    vfabs.v v11, v10, v0.t
514; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
515; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
516; ZVFHMIN-NEXT:    vmflt.vf v8, v11, fa5, v0.t
517; ZVFHMIN-NEXT:    vmv.v.v v0, v8
518; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
519; ZVFHMIN-NEXT:    vfcvt.x.f.v v11, v10, v0.t
520; ZVFHMIN-NEXT:    vfcvt.f.x.v v11, v11, v0.t
521; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
522; ZVFHMIN-NEXT:    vfsgnj.vv v10, v11, v10, v0.t
523; ZVFHMIN-NEXT:    vmv1r.v v0, v9
524; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
525; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
526; ZVFHMIN-NEXT:    ret
527  %v = call <vscale x 2 x half> @llvm.vp.rint.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
528  ret <vscale x 2 x half> %v
529}
530
531define <vscale x 2 x half> @vp_rint_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
532; ZVFH-LABEL: vp_rint_nxv2f16_unmasked:
533; ZVFH:       # %bb.0:
534; ZVFH-NEXT:    lui a1, %hi(.LCPI15_0)
535; ZVFH-NEXT:    flh fa5, %lo(.LCPI15_0)(a1)
536; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
537; ZVFH-NEXT:    vfabs.v v9, v8
538; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
539; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
540; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
541; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
542; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
543; ZVFH-NEXT:    ret
544;
545; ZVFHMIN-LABEL: vp_rint_nxv2f16_unmasked:
546; ZVFHMIN:       # %bb.0:
547; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
548; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
549; ZVFHMIN-NEXT:    lui a0, 307200
550; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
551; ZVFHMIN-NEXT:    vfabs.v v8, v9
552; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
553; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
554; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v9, v0.t
555; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
556; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
557; ZVFHMIN-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
558; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
559; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
560; ZVFHMIN-NEXT:    ret
561  %v = call <vscale x 2 x half> @llvm.vp.rint.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
562  ret <vscale x 2 x half> %v
563}
564
565declare <vscale x 4 x half> @llvm.vp.rint.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
566
567define <vscale x 4 x half> @vp_rint_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
568; ZVFH-LABEL: vp_rint_nxv4f16:
569; ZVFH:       # %bb.0:
570; ZVFH-NEXT:    lui a1, %hi(.LCPI16_0)
571; ZVFH-NEXT:    flh fa5, %lo(.LCPI16_0)(a1)
572; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
573; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
574; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
575; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
576; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
577; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
578; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
579; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
580; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
581; ZVFH-NEXT:    ret
582;
583; ZVFHMIN-LABEL: vp_rint_nxv4f16:
584; ZVFHMIN:       # %bb.0:
585; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
586; ZVFHMIN-NEXT:    vmv1r.v v9, v0
587; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
588; ZVFHMIN-NEXT:    lui a0, 307200
589; ZVFHMIN-NEXT:    vmv1r.v v8, v0
590; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
591; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
592; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
593; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
594; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
595; ZVFHMIN-NEXT:    vmv1r.v v0, v8
596; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
597; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
598; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
599; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
600; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
601; ZVFHMIN-NEXT:    vmv1r.v v0, v9
602; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
603; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
604; ZVFHMIN-NEXT:    ret
605  %v = call <vscale x 4 x half> @llvm.vp.rint.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
606  ret <vscale x 4 x half> %v
607}
608
609define <vscale x 4 x half> @vp_rint_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
610; ZVFH-LABEL: vp_rint_nxv4f16_unmasked:
611; ZVFH:       # %bb.0:
612; ZVFH-NEXT:    lui a1, %hi(.LCPI17_0)
613; ZVFH-NEXT:    flh fa5, %lo(.LCPI17_0)(a1)
614; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
615; ZVFH-NEXT:    vfabs.v v9, v8
616; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
617; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
618; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
619; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
620; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
621; ZVFH-NEXT:    ret
622;
623; ZVFHMIN-LABEL: vp_rint_nxv4f16_unmasked:
624; ZVFHMIN:       # %bb.0:
625; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
626; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
627; ZVFHMIN-NEXT:    lui a0, 307200
628; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
629; ZVFHMIN-NEXT:    vfabs.v v8, v10
630; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
631; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
632; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
633; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
634; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
635; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
636; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
637; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
638; ZVFHMIN-NEXT:    ret
639  %v = call <vscale x 4 x half> @llvm.vp.rint.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
640  ret <vscale x 4 x half> %v
641}
642
643declare <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
644
645define <vscale x 8 x half> @vp_rint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
646; ZVFH-LABEL: vp_rint_nxv8f16:
647; ZVFH:       # %bb.0:
648; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
649; ZVFH-NEXT:    vmv1r.v v10, v0
650; ZVFH-NEXT:    lui a0, %hi(.LCPI18_0)
651; ZVFH-NEXT:    flh fa5, %lo(.LCPI18_0)(a0)
652; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
653; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
654; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
655; ZVFH-NEXT:    vmv1r.v v0, v10
656; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
657; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
658; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
659; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
660; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
661; ZVFH-NEXT:    ret
662;
663; ZVFHMIN-LABEL: vp_rint_nxv8f16:
664; ZVFHMIN:       # %bb.0:
665; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
666; ZVFHMIN-NEXT:    vmv1r.v v10, v0
667; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
668; ZVFHMIN-NEXT:    lui a0, 307200
669; ZVFHMIN-NEXT:    vmv1r.v v8, v0
670; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
671; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
672; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
673; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
674; ZVFHMIN-NEXT:    vmflt.vf v8, v16, fa5, v0.t
675; ZVFHMIN-NEXT:    vmv1r.v v0, v8
676; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
677; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
678; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
679; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
680; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
681; ZVFHMIN-NEXT:    vmv1r.v v0, v10
682; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
683; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
684; ZVFHMIN-NEXT:    ret
685  %v = call <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
686  ret <vscale x 8 x half> %v
687}
688
689define <vscale x 8 x half> @vp_rint_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
690; ZVFH-LABEL: vp_rint_nxv8f16_unmasked:
691; ZVFH:       # %bb.0:
692; ZVFH-NEXT:    lui a1, %hi(.LCPI19_0)
693; ZVFH-NEXT:    flh fa5, %lo(.LCPI19_0)(a1)
694; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
695; ZVFH-NEXT:    vfabs.v v10, v8
696; ZVFH-NEXT:    vmflt.vf v0, v10, fa5
697; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
698; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
699; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
700; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
701; ZVFH-NEXT:    ret
702;
703; ZVFHMIN-LABEL: vp_rint_nxv8f16_unmasked:
704; ZVFHMIN:       # %bb.0:
705; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
706; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
707; ZVFHMIN-NEXT:    lui a0, 307200
708; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
709; ZVFHMIN-NEXT:    vfabs.v v8, v12
710; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
711; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
712; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
713; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
714; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
715; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
716; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
717; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
718; ZVFHMIN-NEXT:    ret
719  %v = call <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
720  ret <vscale x 8 x half> %v
721}
722
723declare <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32)
724
725define <vscale x 16 x half> @vp_rint_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
726; ZVFH-LABEL: vp_rint_nxv16f16:
727; ZVFH:       # %bb.0:
728; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
729; ZVFH-NEXT:    vmv1r.v v12, v0
730; ZVFH-NEXT:    lui a0, %hi(.LCPI20_0)
731; ZVFH-NEXT:    flh fa5, %lo(.LCPI20_0)(a0)
732; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
733; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
734; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
735; ZVFH-NEXT:    vmv1r.v v0, v12
736; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
737; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
738; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
739; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
740; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
741; ZVFH-NEXT:    ret
742;
743; ZVFHMIN-LABEL: vp_rint_nxv16f16:
744; ZVFHMIN:       # %bb.0:
745; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
746; ZVFHMIN-NEXT:    vmv1r.v v12, v0
747; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
748; ZVFHMIN-NEXT:    lui a0, 307200
749; ZVFHMIN-NEXT:    vmv1r.v v8, v0
750; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
751; ZVFHMIN-NEXT:    vfabs.v v24, v16, v0.t
752; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
753; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
754; ZVFHMIN-NEXT:    vmflt.vf v8, v24, fa5, v0.t
755; ZVFHMIN-NEXT:    vmv1r.v v0, v8
756; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
757; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
758; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
759; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
760; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
761; ZVFHMIN-NEXT:    vmv1r.v v0, v12
762; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
763; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
764; ZVFHMIN-NEXT:    ret
765  %v = call <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
766  ret <vscale x 16 x half> %v
767}
768
769define <vscale x 16 x half> @vp_rint_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
770; ZVFH-LABEL: vp_rint_nxv16f16_unmasked:
771; ZVFH:       # %bb.0:
772; ZVFH-NEXT:    lui a1, %hi(.LCPI21_0)
773; ZVFH-NEXT:    flh fa5, %lo(.LCPI21_0)(a1)
774; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
775; ZVFH-NEXT:    vfabs.v v12, v8
776; ZVFH-NEXT:    vmflt.vf v0, v12, fa5
777; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
778; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
779; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
780; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
781; ZVFH-NEXT:    ret
782;
783; ZVFHMIN-LABEL: vp_rint_nxv16f16_unmasked:
784; ZVFHMIN:       # %bb.0:
785; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
786; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
787; ZVFHMIN-NEXT:    lui a0, 307200
788; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
789; ZVFHMIN-NEXT:    vfabs.v v8, v16
790; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
791; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
792; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v16, v0.t
793; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
794; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
795; ZVFHMIN-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
796; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
797; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
798; ZVFHMIN-NEXT:    ret
799  %v = call <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
800  ret <vscale x 16 x half> %v
801}
802
803declare <vscale x 32 x half> @llvm.vp.rint.nxv32f16(<vscale x 32 x half>, <vscale x 32 x i1>, i32)
804
805define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
806; ZVFH-LABEL: vp_rint_nxv32f16:
807; ZVFH:       # %bb.0:
808; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
809; ZVFH-NEXT:    vmv1r.v v16, v0
810; ZVFH-NEXT:    lui a0, %hi(.LCPI22_0)
811; ZVFH-NEXT:    flh fa5, %lo(.LCPI22_0)(a0)
812; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
813; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
814; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
815; ZVFH-NEXT:    vmv1r.v v0, v16
816; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
817; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
818; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
819; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
820; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
821; ZVFH-NEXT:    ret
822;
823; ZVFHMIN-LABEL: vp_rint_nxv32f16:
824; ZVFHMIN:       # %bb.0:
825; ZVFHMIN-NEXT:    addi sp, sp, -16
826; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
827; ZVFHMIN-NEXT:    csrr a1, vlenb
828; ZVFHMIN-NEXT:    slli a1, a1, 3
829; ZVFHMIN-NEXT:    sub sp, sp, a1
830; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
831; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
832; ZVFHMIN-NEXT:    vmv1r.v v7, v0
833; ZVFHMIN-NEXT:    csrr a2, vlenb
834; ZVFHMIN-NEXT:    lui a3, 307200
835; ZVFHMIN-NEXT:    slli a1, a2, 1
836; ZVFHMIN-NEXT:    srli a2, a2, 2
837; ZVFHMIN-NEXT:    fmv.w.x fa5, a3
838; ZVFHMIN-NEXT:    sub a3, a0, a1
839; ZVFHMIN-NEXT:    vslidedown.vx v17, v0, a2
840; ZVFHMIN-NEXT:    sltu a2, a0, a3
841; ZVFHMIN-NEXT:    vmv1r.v v18, v17
842; ZVFHMIN-NEXT:    addi a2, a2, -1
843; ZVFHMIN-NEXT:    and a2, a2, a3
844; ZVFHMIN-NEXT:    vmv1r.v v0, v17
845; ZVFHMIN-NEXT:    addi a3, sp, 16
846; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
847; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
848; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
849; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
850; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
851; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
852; ZVFHMIN-NEXT:    vmflt.vf v18, v8, fa5, v0.t
853; ZVFHMIN-NEXT:    vmv1r.v v0, v18
854; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
855; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
856; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
857; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
858; ZVFHMIN-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
859; ZVFHMIN-NEXT:    vmv1r.v v0, v17
860; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
861; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24, v0.t
862; ZVFHMIN-NEXT:    bltu a0, a1, .LBB22_2
863; ZVFHMIN-NEXT:  # %bb.1:
864; ZVFHMIN-NEXT:    mv a0, a1
865; ZVFHMIN-NEXT:  .LBB22_2:
866; ZVFHMIN-NEXT:    vmv1r.v v0, v7
867; ZVFHMIN-NEXT:    addi a1, sp, 16
868; ZVFHMIN-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
869; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
870; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
871; ZVFHMIN-NEXT:    vmv1r.v v8, v7
872; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
873; ZVFHMIN-NEXT:    vfabs.v v16, v24, v0.t
874; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
875; ZVFHMIN-NEXT:    vmflt.vf v8, v16, fa5, v0.t
876; ZVFHMIN-NEXT:    vmv1r.v v0, v8
877; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
878; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v24, v0.t
879; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
880; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
881; ZVFHMIN-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
882; ZVFHMIN-NEXT:    vmv1r.v v0, v7
883; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
884; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24, v0.t
885; ZVFHMIN-NEXT:    csrr a0, vlenb
886; ZVFHMIN-NEXT:    slli a0, a0, 3
887; ZVFHMIN-NEXT:    add sp, sp, a0
888; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
889; ZVFHMIN-NEXT:    addi sp, sp, 16
890; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
891; ZVFHMIN-NEXT:    ret
892  %v = call <vscale x 32 x half> @llvm.vp.rint.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
893  ret <vscale x 32 x half> %v
894}
895
896define <vscale x 32 x half> @vp_rint_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
897; ZVFH-LABEL: vp_rint_nxv32f16_unmasked:
898; ZVFH:       # %bb.0:
899; ZVFH-NEXT:    lui a1, %hi(.LCPI23_0)
900; ZVFH-NEXT:    flh fa5, %lo(.LCPI23_0)(a1)
901; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
902; ZVFH-NEXT:    vfabs.v v16, v8
903; ZVFH-NEXT:    vmflt.vf v0, v16, fa5
904; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
905; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
906; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
907; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
908; ZVFH-NEXT:    ret
909;
910; ZVFHMIN-LABEL: vp_rint_nxv32f16_unmasked:
911; ZVFHMIN:       # %bb.0:
912; ZVFHMIN-NEXT:    addi sp, sp, -16
913; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
914; ZVFHMIN-NEXT:    csrr a1, vlenb
915; ZVFHMIN-NEXT:    slli a1, a1, 3
916; ZVFHMIN-NEXT:    sub sp, sp, a1
917; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
918; ZVFHMIN-NEXT:    csrr a2, vlenb
919; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
920; ZVFHMIN-NEXT:    vmset.m v16
921; ZVFHMIN-NEXT:    lui a3, 307200
922; ZVFHMIN-NEXT:    slli a1, a2, 1
923; ZVFHMIN-NEXT:    srli a2, a2, 2
924; ZVFHMIN-NEXT:    fmv.w.x fa5, a3
925; ZVFHMIN-NEXT:    sub a3, a0, a1
926; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
927; ZVFHMIN-NEXT:    vslidedown.vx v16, v16, a2
928; ZVFHMIN-NEXT:    sltu a2, a0, a3
929; ZVFHMIN-NEXT:    vmv1r.v v17, v16
930; ZVFHMIN-NEXT:    addi a2, a2, -1
931; ZVFHMIN-NEXT:    and a2, a2, a3
932; ZVFHMIN-NEXT:    vmv1r.v v0, v16
933; ZVFHMIN-NEXT:    addi a3, sp, 16
934; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
935; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
936; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
937; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
938; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
939; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
940; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
941; ZVFHMIN-NEXT:    vmv1r.v v0, v17
942; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
943; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
944; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
945; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
946; ZVFHMIN-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
947; ZVFHMIN-NEXT:    vmv1r.v v0, v16
948; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
949; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24, v0.t
950; ZVFHMIN-NEXT:    bltu a0, a1, .LBB23_2
951; ZVFHMIN-NEXT:  # %bb.1:
952; ZVFHMIN-NEXT:    mv a0, a1
953; ZVFHMIN-NEXT:  .LBB23_2:
954; ZVFHMIN-NEXT:    addi a1, sp, 16
955; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
956; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
957; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
958; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
959; ZVFHMIN-NEXT:    vfabs.v v24, v16
960; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5
961; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
962; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
963; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
964; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
965; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
966; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
967; ZVFHMIN-NEXT:    csrr a0, vlenb
968; ZVFHMIN-NEXT:    slli a0, a0, 3
969; ZVFHMIN-NEXT:    add sp, sp, a0
970; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
971; ZVFHMIN-NEXT:    addi sp, sp, 16
972; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
973; ZVFHMIN-NEXT:    ret
974  %v = call <vscale x 32 x half> @llvm.vp.rint.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
975  ret <vscale x 32 x half> %v
976}
977
978declare <vscale x 1 x float> @llvm.vp.rint.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
979
980define <vscale x 1 x float> @vp_rint_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
981; CHECK-LABEL: vp_rint_nxv1f32:
982; CHECK:       # %bb.0:
983; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
984; CHECK-NEXT:    vfabs.v v9, v8, v0.t
985; CHECK-NEXT:    lui a0, 307200
986; CHECK-NEXT:    fmv.w.x fa5, a0
987; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
988; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
989; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
990; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
991; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
992; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
993; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
994; CHECK-NEXT:    ret
995  %v = call <vscale x 1 x float> @llvm.vp.rint.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
996  ret <vscale x 1 x float> %v
997}
998
999define <vscale x 1 x float> @vp_rint_nxv1f32_unmasked(<vscale x 1 x float> %va, i32 zeroext %evl) {
1000; CHECK-LABEL: vp_rint_nxv1f32_unmasked:
1001; CHECK:       # %bb.0:
1002; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1003; CHECK-NEXT:    vfabs.v v9, v8
1004; CHECK-NEXT:    lui a0, 307200
1005; CHECK-NEXT:    fmv.w.x fa5, a0
1006; CHECK-NEXT:    vmflt.vf v0, v9, fa5
1007; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
1008; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
1009; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
1010; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
1011; CHECK-NEXT:    ret
1012  %v = call <vscale x 1 x float> @llvm.vp.rint.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1013  ret <vscale x 1 x float> %v
1014}
1015
1016declare <vscale x 2 x float> @llvm.vp.rint.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
1017
1018define <vscale x 2 x float> @vp_rint_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1019; CHECK-LABEL: vp_rint_nxv2f32:
1020; CHECK:       # %bb.0:
1021; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1022; CHECK-NEXT:    vfabs.v v9, v8, v0.t
1023; CHECK-NEXT:    lui a0, 307200
1024; CHECK-NEXT:    fmv.w.x fa5, a0
1025; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
1026; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
1027; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
1028; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
1029; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
1030; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
1031; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
1032; CHECK-NEXT:    ret
1033  %v = call <vscale x 2 x float> @llvm.vp.rint.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
1034  ret <vscale x 2 x float> %v
1035}
1036
1037define <vscale x 2 x float> @vp_rint_nxv2f32_unmasked(<vscale x 2 x float> %va, i32 zeroext %evl) {
1038; CHECK-LABEL: vp_rint_nxv2f32_unmasked:
1039; CHECK:       # %bb.0:
1040; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1041; CHECK-NEXT:    vfabs.v v9, v8
1042; CHECK-NEXT:    lui a0, 307200
1043; CHECK-NEXT:    fmv.w.x fa5, a0
1044; CHECK-NEXT:    vmflt.vf v0, v9, fa5
1045; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
1046; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
1047; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
1048; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
1049; CHECK-NEXT:    ret
1050  %v = call <vscale x 2 x float> @llvm.vp.rint.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1051  ret <vscale x 2 x float> %v
1052}
1053
1054declare <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
1055
1056define <vscale x 4 x float> @vp_rint_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1057; CHECK-LABEL: vp_rint_nxv4f32:
1058; CHECK:       # %bb.0:
1059; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1060; CHECK-NEXT:    vmv1r.v v10, v0
1061; CHECK-NEXT:    vfabs.v v12, v8, v0.t
1062; CHECK-NEXT:    lui a0, 307200
1063; CHECK-NEXT:    fmv.w.x fa5, a0
1064; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
1065; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
1066; CHECK-NEXT:    vmv1r.v v0, v10
1067; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1068; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
1069; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
1070; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
1071; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
1072; CHECK-NEXT:    ret
1073  %v = call <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
1074  ret <vscale x 4 x float> %v
1075}
1076
1077define <vscale x 4 x float> @vp_rint_nxv4f32_unmasked(<vscale x 4 x float> %va, i32 zeroext %evl) {
1078; CHECK-LABEL: vp_rint_nxv4f32_unmasked:
1079; CHECK:       # %bb.0:
1080; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1081; CHECK-NEXT:    vfabs.v v10, v8
1082; CHECK-NEXT:    lui a0, 307200
1083; CHECK-NEXT:    fmv.w.x fa5, a0
1084; CHECK-NEXT:    vmflt.vf v0, v10, fa5
1085; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
1086; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
1087; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
1088; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
1089; CHECK-NEXT:    ret
1090  %v = call <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1091  ret <vscale x 4 x float> %v
1092}
1093
1094declare <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
1095
1096define <vscale x 8 x float> @vp_rint_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1097; CHECK-LABEL: vp_rint_nxv8f32:
1098; CHECK:       # %bb.0:
1099; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1100; CHECK-NEXT:    vmv1r.v v12, v0
1101; CHECK-NEXT:    vfabs.v v16, v8, v0.t
1102; CHECK-NEXT:    lui a0, 307200
1103; CHECK-NEXT:    fmv.w.x fa5, a0
1104; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1105; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
1106; CHECK-NEXT:    vmv1r.v v0, v12
1107; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1108; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
1109; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
1110; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1111; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
1112; CHECK-NEXT:    ret
1113  %v = call <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
1114  ret <vscale x 8 x float> %v
1115}
1116
1117define <vscale x 8 x float> @vp_rint_nxv8f32_unmasked(<vscale x 8 x float> %va, i32 zeroext %evl) {
1118; CHECK-LABEL: vp_rint_nxv8f32_unmasked:
1119; CHECK:       # %bb.0:
1120; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1121; CHECK-NEXT:    vfabs.v v12, v8
1122; CHECK-NEXT:    lui a0, 307200
1123; CHECK-NEXT:    fmv.w.x fa5, a0
1124; CHECK-NEXT:    vmflt.vf v0, v12, fa5
1125; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
1126; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
1127; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1128; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
1129; CHECK-NEXT:    ret
1130  %v = call <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1131  ret <vscale x 8 x float> %v
1132}
1133
1134declare <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
1135
1136define <vscale x 16 x float> @vp_rint_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1137; CHECK-LABEL: vp_rint_nxv16f32:
1138; CHECK:       # %bb.0:
1139; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1140; CHECK-NEXT:    vmv1r.v v16, v0
1141; CHECK-NEXT:    vfabs.v v24, v8, v0.t
1142; CHECK-NEXT:    lui a0, 307200
1143; CHECK-NEXT:    fmv.w.x fa5, a0
1144; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
1145; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
1146; CHECK-NEXT:    vmv1r.v v0, v16
1147; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
1148; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
1149; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
1150; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
1151; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
1152; CHECK-NEXT:    ret
1153  %v = call <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
1154  ret <vscale x 16 x float> %v
1155}
1156
1157define <vscale x 16 x float> @vp_rint_nxv16f32_unmasked(<vscale x 16 x float> %va, i32 zeroext %evl) {
1158; CHECK-LABEL: vp_rint_nxv16f32_unmasked:
1159; CHECK:       # %bb.0:
1160; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1161; CHECK-NEXT:    vfabs.v v16, v8
1162; CHECK-NEXT:    lui a0, 307200
1163; CHECK-NEXT:    fmv.w.x fa5, a0
1164; CHECK-NEXT:    vmflt.vf v0, v16, fa5
1165; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
1166; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
1167; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
1168; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
1169; CHECK-NEXT:    ret
1170  %v = call <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1171  ret <vscale x 16 x float> %v
1172}
1173
1174declare <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
1175
1176define <vscale x 1 x double> @vp_rint_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1177; CHECK-LABEL: vp_rint_nxv1f64:
1178; CHECK:       # %bb.0:
1179; CHECK-NEXT:    lui a1, %hi(.LCPI34_0)
1180; CHECK-NEXT:    fld fa5, %lo(.LCPI34_0)(a1)
1181; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1182; CHECK-NEXT:    vfabs.v v9, v8, v0.t
1183; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
1184; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
1185; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
1186; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
1187; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
1188; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
1189; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
1190; CHECK-NEXT:    ret
1191  %v = call <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
1192  ret <vscale x 1 x double> %v
1193}
1194
1195define <vscale x 1 x double> @vp_rint_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
1196; CHECK-LABEL: vp_rint_nxv1f64_unmasked:
1197; CHECK:       # %bb.0:
1198; CHECK-NEXT:    lui a1, %hi(.LCPI35_0)
1199; CHECK-NEXT:    fld fa5, %lo(.LCPI35_0)(a1)
1200; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1201; CHECK-NEXT:    vfabs.v v9, v8
1202; CHECK-NEXT:    vmflt.vf v0, v9, fa5
1203; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
1204; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
1205; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
1206; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
1207; CHECK-NEXT:    ret
1208  %v = call <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1209  ret <vscale x 1 x double> %v
1210}
1211
1212declare <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
1213
1214define <vscale x 2 x double> @vp_rint_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1215; CHECK-LABEL: vp_rint_nxv2f64:
1216; CHECK:       # %bb.0:
1217; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1218; CHECK-NEXT:    vmv1r.v v10, v0
1219; CHECK-NEXT:    lui a0, %hi(.LCPI36_0)
1220; CHECK-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
1221; CHECK-NEXT:    vfabs.v v12, v8, v0.t
1222; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
1223; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
1224; CHECK-NEXT:    vmv1r.v v0, v10
1225; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
1226; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
1227; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
1228; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
1229; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
1230; CHECK-NEXT:    ret
1231  %v = call <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
1232  ret <vscale x 2 x double> %v
1233}
1234
1235define <vscale x 2 x double> @vp_rint_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
1236; CHECK-LABEL: vp_rint_nxv2f64_unmasked:
1237; CHECK:       # %bb.0:
1238; CHECK-NEXT:    lui a1, %hi(.LCPI37_0)
1239; CHECK-NEXT:    fld fa5, %lo(.LCPI37_0)(a1)
1240; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1241; CHECK-NEXT:    vfabs.v v10, v8
1242; CHECK-NEXT:    vmflt.vf v0, v10, fa5
1243; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
1244; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
1245; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
1246; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
1247; CHECK-NEXT:    ret
1248  %v = call <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1249  ret <vscale x 2 x double> %v
1250}
1251
1252declare <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
1253
1254define <vscale x 4 x double> @vp_rint_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1255; CHECK-LABEL: vp_rint_nxv4f64:
1256; CHECK:       # %bb.0:
1257; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1258; CHECK-NEXT:    vmv1r.v v12, v0
1259; CHECK-NEXT:    lui a0, %hi(.LCPI38_0)
1260; CHECK-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
1261; CHECK-NEXT:    vfabs.v v16, v8, v0.t
1262; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
1263; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
1264; CHECK-NEXT:    vmv1r.v v0, v12
1265; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
1266; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
1267; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
1268; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
1269; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
1270; CHECK-NEXT:    ret
1271  %v = call <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
1272  ret <vscale x 4 x double> %v
1273}
1274
1275define <vscale x 4 x double> @vp_rint_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
1276; CHECK-LABEL: vp_rint_nxv4f64_unmasked:
1277; CHECK:       # %bb.0:
1278; CHECK-NEXT:    lui a1, %hi(.LCPI39_0)
1279; CHECK-NEXT:    fld fa5, %lo(.LCPI39_0)(a1)
1280; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1281; CHECK-NEXT:    vfabs.v v12, v8
1282; CHECK-NEXT:    vmflt.vf v0, v12, fa5
1283; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
1284; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
1285; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
1286; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
1287; CHECK-NEXT:    ret
1288  %v = call <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1289  ret <vscale x 4 x double> %v
1290}
1291
1292declare <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32)
1293
1294define <vscale x 7 x double> @vp_rint_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
1295; CHECK-LABEL: vp_rint_nxv7f64:
1296; CHECK:       # %bb.0:
1297; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1298; CHECK-NEXT:    vmv1r.v v16, v0
1299; CHECK-NEXT:    lui a0, %hi(.LCPI40_0)
1300; CHECK-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
1301; CHECK-NEXT:    vfabs.v v24, v8, v0.t
1302; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1303; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
1304; CHECK-NEXT:    vmv1r.v v0, v16
1305; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1306; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
1307; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
1308; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1309; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
1310; CHECK-NEXT:    ret
1311  %v = call <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
1312  ret <vscale x 7 x double> %v
1313}
1314
1315define <vscale x 7 x double> @vp_rint_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
1316; CHECK-LABEL: vp_rint_nxv7f64_unmasked:
1317; CHECK:       # %bb.0:
1318; CHECK-NEXT:    lui a1, %hi(.LCPI41_0)
1319; CHECK-NEXT:    fld fa5, %lo(.LCPI41_0)(a1)
1320; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1321; CHECK-NEXT:    vfabs.v v16, v8
1322; CHECK-NEXT:    vmflt.vf v0, v16, fa5
1323; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
1324; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
1325; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1326; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
1327; CHECK-NEXT:    ret
1328  %v = call <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> splat (i1 true), i32 %evl)
1329  ret <vscale x 7 x double> %v
1330}
1331
1332declare <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
1333
1334define <vscale x 8 x double> @vp_rint_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1335; CHECK-LABEL: vp_rint_nxv8f64:
1336; CHECK:       # %bb.0:
1337; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1338; CHECK-NEXT:    vmv1r.v v16, v0
1339; CHECK-NEXT:    lui a0, %hi(.LCPI42_0)
1340; CHECK-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
1341; CHECK-NEXT:    vfabs.v v24, v8, v0.t
1342; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1343; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
1344; CHECK-NEXT:    vmv1r.v v0, v16
1345; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1346; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
1347; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
1348; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1349; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
1350; CHECK-NEXT:    ret
1351  %v = call <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
1352  ret <vscale x 8 x double> %v
1353}
1354
1355define <vscale x 8 x double> @vp_rint_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
1356; CHECK-LABEL: vp_rint_nxv8f64_unmasked:
1357; CHECK:       # %bb.0:
1358; CHECK-NEXT:    lui a1, %hi(.LCPI43_0)
1359; CHECK-NEXT:    fld fa5, %lo(.LCPI43_0)(a1)
1360; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1361; CHECK-NEXT:    vfabs.v v16, v8
1362; CHECK-NEXT:    vmflt.vf v0, v16, fa5
1363; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
1364; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
1365; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1366; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
1367; CHECK-NEXT:    ret
1368  %v = call <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1369  ret <vscale x 8 x double> %v
1370}
1371
1372; Test splitting.
1373declare <vscale x 16 x double> @llvm.vp.rint.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32)
1374
1375define <vscale x 16 x double> @vp_rint_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1376; CHECK-LABEL: vp_rint_nxv16f64:
1377; CHECK:       # %bb.0:
1378; CHECK-NEXT:    addi sp, sp, -16
1379; CHECK-NEXT:    .cfi_def_cfa_offset 16
1380; CHECK-NEXT:    csrr a1, vlenb
1381; CHECK-NEXT:    slli a1, a1, 3
1382; CHECK-NEXT:    sub sp, sp, a1
1383; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1384; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
1385; CHECK-NEXT:    vmv1r.v v7, v0
1386; CHECK-NEXT:    csrr a1, vlenb
1387; CHECK-NEXT:    lui a2, %hi(.LCPI44_0)
1388; CHECK-NEXT:    srli a3, a1, 3
1389; CHECK-NEXT:    fld fa5, %lo(.LCPI44_0)(a2)
1390; CHECK-NEXT:    sub a2, a0, a1
1391; CHECK-NEXT:    vslidedown.vx v6, v0, a3
1392; CHECK-NEXT:    sltu a3, a0, a2
1393; CHECK-NEXT:    addi a3, a3, -1
1394; CHECK-NEXT:    and a2, a3, a2
1395; CHECK-NEXT:    vmv1r.v v0, v6
1396; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
1397; CHECK-NEXT:    vfabs.v v24, v16, v0.t
1398; CHECK-NEXT:    addi a2, sp, 16
1399; CHECK-NEXT:    vs8r.v v24, (a2) # Unknown-size Folded Spill
1400; CHECK-NEXT:    vl8r.v v24, (a2) # Unknown-size Folded Reload
1401; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1402; CHECK-NEXT:    vmflt.vf v6, v24, fa5, v0.t
1403; CHECK-NEXT:    vmv1r.v v0, v6
1404; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1405; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
1406; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
1407; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1408; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
1409; CHECK-NEXT:    bltu a0, a1, .LBB44_2
1410; CHECK-NEXT:  # %bb.1:
1411; CHECK-NEXT:    mv a0, a1
1412; CHECK-NEXT:  .LBB44_2:
1413; CHECK-NEXT:    vmv1r.v v0, v7
1414; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1415; CHECK-NEXT:    vfabs.v v24, v8, v0.t
1416; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1417; CHECK-NEXT:    vmflt.vf v7, v24, fa5, v0.t
1418; CHECK-NEXT:    vmv1r.v v0, v7
1419; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1420; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
1421; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
1422; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1423; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
1424; CHECK-NEXT:    csrr a0, vlenb
1425; CHECK-NEXT:    slli a0, a0, 3
1426; CHECK-NEXT:    add sp, sp, a0
1427; CHECK-NEXT:    .cfi_def_cfa sp, 16
1428; CHECK-NEXT:    addi sp, sp, 16
1429; CHECK-NEXT:    .cfi_def_cfa_offset 0
1430; CHECK-NEXT:    ret
1431  %v = call <vscale x 16 x double> @llvm.vp.rint.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
1432  ret <vscale x 16 x double> %v
1433}
1434
1435define <vscale x 16 x double> @vp_rint_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) {
1436; CHECK-LABEL: vp_rint_nxv16f64_unmasked:
1437; CHECK:       # %bb.0:
1438; CHECK-NEXT:    csrr a1, vlenb
1439; CHECK-NEXT:    lui a2, %hi(.LCPI45_0)
1440; CHECK-NEXT:    sub a3, a0, a1
1441; CHECK-NEXT:    fld fa5, %lo(.LCPI45_0)(a2)
1442; CHECK-NEXT:    sltu a2, a0, a3
1443; CHECK-NEXT:    addi a2, a2, -1
1444; CHECK-NEXT:    and a2, a2, a3
1445; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
1446; CHECK-NEXT:    vfabs.v v24, v16
1447; CHECK-NEXT:    vmflt.vf v0, v24, fa5
1448; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
1449; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
1450; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1451; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
1452; CHECK-NEXT:    bltu a0, a1, .LBB45_2
1453; CHECK-NEXT:  # %bb.1:
1454; CHECK-NEXT:    mv a0, a1
1455; CHECK-NEXT:  .LBB45_2:
1456; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1457; CHECK-NEXT:    vfabs.v v24, v8
1458; CHECK-NEXT:    vmflt.vf v0, v24, fa5
1459; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
1460; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
1461; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1462; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
1463; CHECK-NEXT:    ret
1464  %v = call <vscale x 16 x double> @llvm.vp.rint.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1465  ret <vscale x 16 x double> %v
1466}
1467