xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
3; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
4; RUN:     --check-prefixes=CHECK,ZVFH
5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
6; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
7; RUN:     --check-prefixes=CHECK,ZVFH
8; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
9; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
10; RUN:     --check-prefixes=CHECK,ZVFHMIN
11; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
12; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
13; RUN:     --check-prefixes=CHECK,ZVFHMIN
14
15; This file tests the code generation for `llvm.round.*` on scalable vector type.
16
17define <vscale x 1 x bfloat> @round_nxv1bf16(<vscale x 1 x bfloat> %x) {
18; CHECK-LABEL: round_nxv1bf16:
19; CHECK:       # %bb.0:
20; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
21; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
22; CHECK-NEXT:    lui a0, 307200
23; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
24; CHECK-NEXT:    vfabs.v v8, v9
25; CHECK-NEXT:    fmv.w.x fa5, a0
26; CHECK-NEXT:    vmflt.vf v0, v8, fa5
27; CHECK-NEXT:    fsrmi a0, 4
28; CHECK-NEXT:    vfcvt.x.f.v v8, v9, v0.t
29; CHECK-NEXT:    fsrm a0
30; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
31; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
32; CHECK-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
33; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
34; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
35; CHECK-NEXT:    ret
36  %a = call <vscale x 1 x bfloat> @llvm.round.nxv1bf16(<vscale x 1 x bfloat> %x)
37  ret <vscale x 1 x bfloat> %a
38}
39
40define <vscale x 2 x bfloat> @round_nxv2bf16(<vscale x 2 x bfloat> %x) {
41; CHECK-LABEL: round_nxv2bf16:
42; CHECK:       # %bb.0:
43; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
44; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
45; CHECK-NEXT:    lui a0, 307200
46; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
47; CHECK-NEXT:    vfabs.v v8, v9
48; CHECK-NEXT:    fmv.w.x fa5, a0
49; CHECK-NEXT:    vmflt.vf v0, v8, fa5
50; CHECK-NEXT:    fsrmi a0, 4
51; CHECK-NEXT:    vfcvt.x.f.v v8, v9, v0.t
52; CHECK-NEXT:    fsrm a0
53; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
54; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
55; CHECK-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
56; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
57; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
58; CHECK-NEXT:    ret
59  %a = call <vscale x 2 x bfloat> @llvm.round.nxv2bf16(<vscale x 2 x bfloat> %x)
60  ret <vscale x 2 x bfloat> %a
61}
62
63define <vscale x 4 x bfloat> @round_nxv4bf16(<vscale x 4 x bfloat> %x) {
64; CHECK-LABEL: round_nxv4bf16:
65; CHECK:       # %bb.0:
66; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
67; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
68; CHECK-NEXT:    lui a0, 307200
69; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
70; CHECK-NEXT:    vfabs.v v8, v10
71; CHECK-NEXT:    fmv.w.x fa5, a0
72; CHECK-NEXT:    vmflt.vf v0, v8, fa5
73; CHECK-NEXT:    fsrmi a0, 4
74; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
75; CHECK-NEXT:    fsrm a0
76; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
77; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
78; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
79; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
80; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
81; CHECK-NEXT:    ret
82  %a = call <vscale x 4 x bfloat> @llvm.round.nxv4bf16(<vscale x 4 x bfloat> %x)
83  ret <vscale x 4 x bfloat> %a
84}
85
86define <vscale x 8 x bfloat> @round_nxv8bf16(<vscale x 8 x bfloat> %x) {
87; CHECK-LABEL: round_nxv8bf16:
88; CHECK:       # %bb.0:
89; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
90; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
91; CHECK-NEXT:    lui a0, 307200
92; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
93; CHECK-NEXT:    vfabs.v v8, v12
94; CHECK-NEXT:    fmv.w.x fa5, a0
95; CHECK-NEXT:    vmflt.vf v0, v8, fa5
96; CHECK-NEXT:    fsrmi a0, 4
97; CHECK-NEXT:    vfcvt.x.f.v v8, v12, v0.t
98; CHECK-NEXT:    fsrm a0
99; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
100; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
101; CHECK-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
102; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
103; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
104; CHECK-NEXT:    ret
105  %a = call <vscale x 8 x bfloat> @llvm.round.nxv8bf16(<vscale x 8 x bfloat> %x)
106  ret <vscale x 8 x bfloat> %a
107}
108
109define <vscale x 16 x bfloat> @round_nxv16bf16(<vscale x 16 x bfloat> %x) {
110; CHECK-LABEL: round_nxv16bf16:
111; CHECK:       # %bb.0:
112; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
113; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
114; CHECK-NEXT:    lui a0, 307200
115; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
116; CHECK-NEXT:    vfabs.v v8, v16
117; CHECK-NEXT:    fmv.w.x fa5, a0
118; CHECK-NEXT:    vmflt.vf v0, v8, fa5
119; CHECK-NEXT:    fsrmi a0, 4
120; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
121; CHECK-NEXT:    fsrm a0
122; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
123; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
124; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
125; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
126; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
127; CHECK-NEXT:    ret
128  %a = call <vscale x 16 x bfloat> @llvm.round.nxv16bf16(<vscale x 16 x bfloat> %x)
129  ret <vscale x 16 x bfloat> %a
130}
131
132define <vscale x 32 x bfloat> @round_nxv32bf16(<vscale x 32 x bfloat> %x) {
133; CHECK-LABEL: round_nxv32bf16:
134; CHECK:       # %bb.0:
135; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
136; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
137; CHECK-NEXT:    lui a0, 307200
138; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
139; CHECK-NEXT:    vfabs.v v24, v16
140; CHECK-NEXT:    fmv.w.x fa5, a0
141; CHECK-NEXT:    vmflt.vf v0, v24, fa5
142; CHECK-NEXT:    fsrmi a0, 4
143; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
144; CHECK-NEXT:    fsrm a0
145; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
146; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
147; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
148; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
149; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
150; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
151; CHECK-NEXT:    vfabs.v v8, v24
152; CHECK-NEXT:    vmflt.vf v0, v8, fa5
153; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
154; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
155; CHECK-NEXT:    fsrmi a0, 4
156; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
157; CHECK-NEXT:    vfcvt.x.f.v v16, v24, v0.t
158; CHECK-NEXT:    fsrm a0
159; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
160; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
161; CHECK-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
162; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
163; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24
164; CHECK-NEXT:    ret
165  %a = call <vscale x 32 x bfloat> @llvm.round.nxv32bf16(<vscale x 32 x bfloat> %x)
166  ret <vscale x 32 x bfloat> %a
167}
168
169define <vscale x 1 x half> @round_nxv1f16(<vscale x 1 x half> %x) {
170; ZVFH-LABEL: round_nxv1f16:
171; ZVFH:       # %bb.0:
172; ZVFH-NEXT:    lui a0, %hi(.LCPI6_0)
173; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a0)
174; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
175; ZVFH-NEXT:    vfabs.v v9, v8
176; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
177; ZVFH-NEXT:    fsrmi a0, 4
178; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
179; ZVFH-NEXT:    fsrm a0
180; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
181; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
182; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
183; ZVFH-NEXT:    ret
184;
185; ZVFHMIN-LABEL: round_nxv1f16:
186; ZVFHMIN:       # %bb.0:
187; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
188; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
189; ZVFHMIN-NEXT:    lui a0, 307200
190; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
191; ZVFHMIN-NEXT:    vfabs.v v8, v9
192; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
193; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
194; ZVFHMIN-NEXT:    fsrmi a0, 4
195; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v9, v0.t
196; ZVFHMIN-NEXT:    fsrm a0
197; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
198; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
199; ZVFHMIN-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
200; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
201; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
202; ZVFHMIN-NEXT:    ret
203  %a = call <vscale x 1 x half> @llvm.round.nxv1f16(<vscale x 1 x half> %x)
204  ret <vscale x 1 x half> %a
205}
206declare <vscale x 1 x half> @llvm.round.nxv1f16(<vscale x 1 x half>)
207
208define <vscale x 2 x half> @round_nxv2f16(<vscale x 2 x half> %x) {
209; ZVFH-LABEL: round_nxv2f16:
210; ZVFH:       # %bb.0:
211; ZVFH-NEXT:    lui a0, %hi(.LCPI7_0)
212; ZVFH-NEXT:    flh fa5, %lo(.LCPI7_0)(a0)
213; ZVFH-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
214; ZVFH-NEXT:    vfabs.v v9, v8
215; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
216; ZVFH-NEXT:    fsrmi a0, 4
217; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
218; ZVFH-NEXT:    fsrm a0
219; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
220; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
221; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
222; ZVFH-NEXT:    ret
223;
224; ZVFHMIN-LABEL: round_nxv2f16:
225; ZVFHMIN:       # %bb.0:
226; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
227; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
228; ZVFHMIN-NEXT:    lui a0, 307200
229; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
230; ZVFHMIN-NEXT:    vfabs.v v8, v9
231; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
232; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
233; ZVFHMIN-NEXT:    fsrmi a0, 4
234; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v9, v0.t
235; ZVFHMIN-NEXT:    fsrm a0
236; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
237; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
238; ZVFHMIN-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
239; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
240; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
241; ZVFHMIN-NEXT:    ret
242  %a = call <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half> %x)
243  ret <vscale x 2 x half> %a
244}
245declare <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half>)
246
247define <vscale x 4 x half> @round_nxv4f16(<vscale x 4 x half> %x) {
248; ZVFH-LABEL: round_nxv4f16:
249; ZVFH:       # %bb.0:
250; ZVFH-NEXT:    lui a0, %hi(.LCPI8_0)
251; ZVFH-NEXT:    flh fa5, %lo(.LCPI8_0)(a0)
252; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
253; ZVFH-NEXT:    vfabs.v v9, v8
254; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
255; ZVFH-NEXT:    fsrmi a0, 4
256; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
257; ZVFH-NEXT:    fsrm a0
258; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
259; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
260; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
261; ZVFH-NEXT:    ret
262;
263; ZVFHMIN-LABEL: round_nxv4f16:
264; ZVFHMIN:       # %bb.0:
265; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
266; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
267; ZVFHMIN-NEXT:    lui a0, 307200
268; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
269; ZVFHMIN-NEXT:    vfabs.v v8, v10
270; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
271; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
272; ZVFHMIN-NEXT:    fsrmi a0, 4
273; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
274; ZVFHMIN-NEXT:    fsrm a0
275; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
276; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
277; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
278; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
279; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
280; ZVFHMIN-NEXT:    ret
281  %a = call <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half> %x)
282  ret <vscale x 4 x half> %a
283}
284declare <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half>)
285
286define <vscale x 8 x half> @round_nxv8f16(<vscale x 8 x half> %x) {
287; ZVFH-LABEL: round_nxv8f16:
288; ZVFH:       # %bb.0:
289; ZVFH-NEXT:    lui a0, %hi(.LCPI9_0)
290; ZVFH-NEXT:    flh fa5, %lo(.LCPI9_0)(a0)
291; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
292; ZVFH-NEXT:    vfabs.v v10, v8
293; ZVFH-NEXT:    vmflt.vf v0, v10, fa5
294; ZVFH-NEXT:    fsrmi a0, 4
295; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
296; ZVFH-NEXT:    fsrm a0
297; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
298; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
299; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
300; ZVFH-NEXT:    ret
301;
302; ZVFHMIN-LABEL: round_nxv8f16:
303; ZVFHMIN:       # %bb.0:
304; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
305; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
306; ZVFHMIN-NEXT:    lui a0, 307200
307; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
308; ZVFHMIN-NEXT:    vfabs.v v8, v12
309; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
310; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
311; ZVFHMIN-NEXT:    fsrmi a0, 4
312; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
313; ZVFHMIN-NEXT:    fsrm a0
314; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
315; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
316; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
317; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
318; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
319; ZVFHMIN-NEXT:    ret
320  %a = call <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half> %x)
321  ret <vscale x 8 x half> %a
322}
323declare <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half>)
324
325define <vscale x 16 x half> @round_nxv16f16(<vscale x 16 x half> %x) {
326; ZVFH-LABEL: round_nxv16f16:
327; ZVFH:       # %bb.0:
328; ZVFH-NEXT:    lui a0, %hi(.LCPI10_0)
329; ZVFH-NEXT:    flh fa5, %lo(.LCPI10_0)(a0)
330; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
331; ZVFH-NEXT:    vfabs.v v12, v8
332; ZVFH-NEXT:    vmflt.vf v0, v12, fa5
333; ZVFH-NEXT:    fsrmi a0, 4
334; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
335; ZVFH-NEXT:    fsrm a0
336; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
337; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
338; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
339; ZVFH-NEXT:    ret
340;
341; ZVFHMIN-LABEL: round_nxv16f16:
342; ZVFHMIN:       # %bb.0:
343; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
344; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
345; ZVFHMIN-NEXT:    lui a0, 307200
346; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
347; ZVFHMIN-NEXT:    vfabs.v v8, v16
348; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
349; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
350; ZVFHMIN-NEXT:    fsrmi a0, 4
351; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v16, v0.t
352; ZVFHMIN-NEXT:    fsrm a0
353; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
354; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
355; ZVFHMIN-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
356; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
357; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
358; ZVFHMIN-NEXT:    ret
359  %a = call <vscale x 16 x half> @llvm.round.nxv16f16(<vscale x 16 x half> %x)
360  ret <vscale x 16 x half> %a
361}
362declare <vscale x 16 x half> @llvm.round.nxv16f16(<vscale x 16 x half>)
363
364define <vscale x 32 x half> @round_nxv32f16(<vscale x 32 x half> %x) {
365; ZVFH-LABEL: round_nxv32f16:
366; ZVFH:       # %bb.0:
367; ZVFH-NEXT:    lui a0, %hi(.LCPI11_0)
368; ZVFH-NEXT:    flh fa5, %lo(.LCPI11_0)(a0)
369; ZVFH-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
370; ZVFH-NEXT:    vfabs.v v16, v8
371; ZVFH-NEXT:    vmflt.vf v0, v16, fa5
372; ZVFH-NEXT:    fsrmi a0, 4
373; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
374; ZVFH-NEXT:    fsrm a0
375; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
376; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
377; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
378; ZVFH-NEXT:    ret
379;
380; ZVFHMIN-LABEL: round_nxv32f16:
381; ZVFHMIN:       # %bb.0:
382; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
383; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
384; ZVFHMIN-NEXT:    lui a0, 307200
385; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
386; ZVFHMIN-NEXT:    vfabs.v v24, v16
387; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
388; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5
389; ZVFHMIN-NEXT:    fsrmi a0, 4
390; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
391; ZVFHMIN-NEXT:    fsrm a0
392; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
393; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
394; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
395; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
396; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
397; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
398; ZVFHMIN-NEXT:    vfabs.v v8, v24
399; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
400; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
401; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
402; ZVFHMIN-NEXT:    fsrmi a0, 4
403; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
404; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v24, v0.t
405; ZVFHMIN-NEXT:    fsrm a0
406; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
407; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
408; ZVFHMIN-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
409; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
410; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
411; ZVFHMIN-NEXT:    ret
412  %a = call <vscale x 32 x half> @llvm.round.nxv32f16(<vscale x 32 x half> %x)
413  ret <vscale x 32 x half> %a
414}
415declare <vscale x 32 x half> @llvm.round.nxv32f16(<vscale x 32 x half>)
416
417define <vscale x 1 x float> @round_nxv1f32(<vscale x 1 x float> %x) {
418; CHECK-LABEL: round_nxv1f32:
419; CHECK:       # %bb.0:
420; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
421; CHECK-NEXT:    vfabs.v v9, v8
422; CHECK-NEXT:    lui a0, 307200
423; CHECK-NEXT:    fmv.w.x fa5, a0
424; CHECK-NEXT:    vmflt.vf v0, v9, fa5
425; CHECK-NEXT:    fsrmi a0, 4
426; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
427; CHECK-NEXT:    fsrm a0
428; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
429; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
430; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
431; CHECK-NEXT:    ret
432  %a = call <vscale x 1 x float> @llvm.round.nxv1f32(<vscale x 1 x float> %x)
433  ret <vscale x 1 x float> %a
434}
435declare <vscale x 1 x float> @llvm.round.nxv1f32(<vscale x 1 x float>)
436
437define <vscale x 2 x float> @round_nxv2f32(<vscale x 2 x float> %x) {
438; CHECK-LABEL: round_nxv2f32:
439; CHECK:       # %bb.0:
440; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
441; CHECK-NEXT:    vfabs.v v9, v8
442; CHECK-NEXT:    lui a0, 307200
443; CHECK-NEXT:    fmv.w.x fa5, a0
444; CHECK-NEXT:    vmflt.vf v0, v9, fa5
445; CHECK-NEXT:    fsrmi a0, 4
446; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
447; CHECK-NEXT:    fsrm a0
448; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
449; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
450; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
451; CHECK-NEXT:    ret
452  %a = call <vscale x 2 x float> @llvm.round.nxv2f32(<vscale x 2 x float> %x)
453  ret <vscale x 2 x float> %a
454}
455declare <vscale x 2 x float> @llvm.round.nxv2f32(<vscale x 2 x float>)
456
457define <vscale x 4 x float> @round_nxv4f32(<vscale x 4 x float> %x) {
458; CHECK-LABEL: round_nxv4f32:
459; CHECK:       # %bb.0:
460; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
461; CHECK-NEXT:    vfabs.v v10, v8
462; CHECK-NEXT:    lui a0, 307200
463; CHECK-NEXT:    fmv.w.x fa5, a0
464; CHECK-NEXT:    vmflt.vf v0, v10, fa5
465; CHECK-NEXT:    fsrmi a0, 4
466; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
467; CHECK-NEXT:    fsrm a0
468; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
469; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
470; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
471; CHECK-NEXT:    ret
472  %a = call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> %x)
473  ret <vscale x 4 x float> %a
474}
475declare <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float>)
476
477define <vscale x 8 x float> @round_nxv8f32(<vscale x 8 x float> %x) {
478; CHECK-LABEL: round_nxv8f32:
479; CHECK:       # %bb.0:
480; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
481; CHECK-NEXT:    vfabs.v v12, v8
482; CHECK-NEXT:    lui a0, 307200
483; CHECK-NEXT:    fmv.w.x fa5, a0
484; CHECK-NEXT:    vmflt.vf v0, v12, fa5
485; CHECK-NEXT:    fsrmi a0, 4
486; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
487; CHECK-NEXT:    fsrm a0
488; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
489; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
490; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
491; CHECK-NEXT:    ret
492  %a = call <vscale x 8 x float> @llvm.round.nxv8f32(<vscale x 8 x float> %x)
493  ret <vscale x 8 x float> %a
494}
495declare <vscale x 8 x float> @llvm.round.nxv8f32(<vscale x 8 x float>)
496
497define <vscale x 16 x float> @round_nxv16f32(<vscale x 16 x float> %x) {
498; CHECK-LABEL: round_nxv16f32:
499; CHECK:       # %bb.0:
500; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
501; CHECK-NEXT:    vfabs.v v16, v8
502; CHECK-NEXT:    lui a0, 307200
503; CHECK-NEXT:    fmv.w.x fa5, a0
504; CHECK-NEXT:    vmflt.vf v0, v16, fa5
505; CHECK-NEXT:    fsrmi a0, 4
506; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
507; CHECK-NEXT:    fsrm a0
508; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
509; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
510; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
511; CHECK-NEXT:    ret
512  %a = call <vscale x 16 x float> @llvm.round.nxv16f32(<vscale x 16 x float> %x)
513  ret <vscale x 16 x float> %a
514}
515declare <vscale x 16 x float> @llvm.round.nxv16f32(<vscale x 16 x float>)
516
517define <vscale x 1 x double> @round_nxv1f64(<vscale x 1 x double> %x) {
518; CHECK-LABEL: round_nxv1f64:
519; CHECK:       # %bb.0:
520; CHECK-NEXT:    lui a0, %hi(.LCPI17_0)
521; CHECK-NEXT:    fld fa5, %lo(.LCPI17_0)(a0)
522; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
523; CHECK-NEXT:    vfabs.v v9, v8
524; CHECK-NEXT:    vmflt.vf v0, v9, fa5
525; CHECK-NEXT:    fsrmi a0, 4
526; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
527; CHECK-NEXT:    fsrm a0
528; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
529; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
530; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
531; CHECK-NEXT:    ret
532  %a = call <vscale x 1 x double> @llvm.round.nxv1f64(<vscale x 1 x double> %x)
533  ret <vscale x 1 x double> %a
534}
535declare <vscale x 1 x double> @llvm.round.nxv1f64(<vscale x 1 x double>)
536
537define <vscale x 2 x double> @round_nxv2f64(<vscale x 2 x double> %x) {
538; CHECK-LABEL: round_nxv2f64:
539; CHECK:       # %bb.0:
540; CHECK-NEXT:    lui a0, %hi(.LCPI18_0)
541; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
542; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
543; CHECK-NEXT:    vfabs.v v10, v8
544; CHECK-NEXT:    vmflt.vf v0, v10, fa5
545; CHECK-NEXT:    fsrmi a0, 4
546; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
547; CHECK-NEXT:    fsrm a0
548; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
549; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
550; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
551; CHECK-NEXT:    ret
552  %a = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> %x)
553  ret <vscale x 2 x double> %a
554}
555declare <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double>)
556
557define <vscale x 4 x double> @round_nxv4f64(<vscale x 4 x double> %x) {
558; CHECK-LABEL: round_nxv4f64:
559; CHECK:       # %bb.0:
560; CHECK-NEXT:    lui a0, %hi(.LCPI19_0)
561; CHECK-NEXT:    fld fa5, %lo(.LCPI19_0)(a0)
562; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
563; CHECK-NEXT:    vfabs.v v12, v8
564; CHECK-NEXT:    vmflt.vf v0, v12, fa5
565; CHECK-NEXT:    fsrmi a0, 4
566; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
567; CHECK-NEXT:    fsrm a0
568; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
569; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
570; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
571; CHECK-NEXT:    ret
572  %a = call <vscale x 4 x double> @llvm.round.nxv4f64(<vscale x 4 x double> %x)
573  ret <vscale x 4 x double> %a
574}
575declare <vscale x 4 x double> @llvm.round.nxv4f64(<vscale x 4 x double>)
576
577define <vscale x 8 x double> @round_nxv8f64(<vscale x 8 x double> %x) {
578; CHECK-LABEL: round_nxv8f64:
579; CHECK:       # %bb.0:
580; CHECK-NEXT:    lui a0, %hi(.LCPI20_0)
581; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
582; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
583; CHECK-NEXT:    vfabs.v v16, v8
584; CHECK-NEXT:    vmflt.vf v0, v16, fa5
585; CHECK-NEXT:    fsrmi a0, 4
586; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
587; CHECK-NEXT:    fsrm a0
588; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
589; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
590; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
591; CHECK-NEXT:    ret
592  %a = call <vscale x 8 x double> @llvm.round.nxv8f64(<vscale x 8 x double> %x)
593  ret <vscale x 8 x double> %a
594}
595declare <vscale x 8 x double> @llvm.round.nxv8f64(<vscale x 8 x double>)
596