xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll (revision b6c0f1bfa79a3a32d841ac5ab1f94c3aee3b5d90)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
3; RUN:   -verify-machineinstrs < %s | FileCheck %s
4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
5; RUN:   -verify-machineinstrs < %s | FileCheck %s
6
7declare <2 x half> @llvm.vp.rint.v2f16(<2 x half>, <2 x i1>, i32)
8
9define <2 x half> @vp_rint_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) {
10; CHECK-LABEL: vp_rint_v2f16:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    lui a1, %hi(.LCPI0_0)
13; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a1)
14; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
15; CHECK-NEXT:    vfabs.v v9, v8, v0.t
16; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
17; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
18; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
19; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
20; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
21; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
22; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
23; CHECK-NEXT:    ret
24  %v = call <2 x half> @llvm.vp.rint.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
25  ret <2 x half> %v
26}
27
28define <2 x half> @vp_rint_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
29; CHECK-LABEL: vp_rint_v2f16_unmasked:
30; CHECK:       # %bb.0:
31; CHECK-NEXT:    lui a1, %hi(.LCPI1_0)
32; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a1)
33; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
34; CHECK-NEXT:    vfabs.v v9, v8
35; CHECK-NEXT:    vmflt.vf v0, v9, fa5
36; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
37; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
38; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
39; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
40; CHECK-NEXT:    ret
41  %v = call <2 x half> @llvm.vp.rint.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl)
42  ret <2 x half> %v
43}
44
45declare <4 x half> @llvm.vp.rint.v4f16(<4 x half>, <4 x i1>, i32)
46
47define <4 x half> @vp_rint_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
48; CHECK-LABEL: vp_rint_v4f16:
49; CHECK:       # %bb.0:
50; CHECK-NEXT:    lui a1, %hi(.LCPI2_0)
51; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a1)
52; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
53; CHECK-NEXT:    vfabs.v v9, v8, v0.t
54; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
55; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
56; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
57; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
58; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
59; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
60; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
61; CHECK-NEXT:    ret
62  %v = call <4 x half> @llvm.vp.rint.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
63  ret <4 x half> %v
64}
65
66define <4 x half> @vp_rint_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
67; CHECK-LABEL: vp_rint_v4f16_unmasked:
68; CHECK:       # %bb.0:
69; CHECK-NEXT:    lui a1, %hi(.LCPI3_0)
70; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a1)
71; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
72; CHECK-NEXT:    vfabs.v v9, v8
73; CHECK-NEXT:    vmflt.vf v0, v9, fa5
74; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
75; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
76; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
77; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
78; CHECK-NEXT:    ret
79  %v = call <4 x half> @llvm.vp.rint.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl)
80  ret <4 x half> %v
81}
82
83declare <8 x half> @llvm.vp.rint.v8f16(<8 x half>, <8 x i1>, i32)
84
85define <8 x half> @vp_rint_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) {
86; CHECK-LABEL: vp_rint_v8f16:
87; CHECK:       # %bb.0:
88; CHECK-NEXT:    lui a1, %hi(.LCPI4_0)
89; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a1)
90; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
91; CHECK-NEXT:    vfabs.v v9, v8, v0.t
92; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
93; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
94; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
95; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
96; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
97; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
98; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
99; CHECK-NEXT:    ret
100  %v = call <8 x half> @llvm.vp.rint.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
101  ret <8 x half> %v
102}
103
104define <8 x half> @vp_rint_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
105; CHECK-LABEL: vp_rint_v8f16_unmasked:
106; CHECK:       # %bb.0:
107; CHECK-NEXT:    lui a1, %hi(.LCPI5_0)
108; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a1)
109; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
110; CHECK-NEXT:    vfabs.v v9, v8
111; CHECK-NEXT:    vmflt.vf v0, v9, fa5
112; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
113; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
114; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
115; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
116; CHECK-NEXT:    ret
117  %v = call <8 x half> @llvm.vp.rint.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl)
118  ret <8 x half> %v
119}
120
121declare <16 x half> @llvm.vp.rint.v16f16(<16 x half>, <16 x i1>, i32)
122
123define <16 x half> @vp_rint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
124; CHECK-LABEL: vp_rint_v16f16:
125; CHECK:       # %bb.0:
126; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
127; CHECK-NEXT:    vmv1r.v v10, v0
128; CHECK-NEXT:    lui a0, %hi(.LCPI6_0)
129; CHECK-NEXT:    flh fa5, %lo(.LCPI6_0)(a0)
130; CHECK-NEXT:    vfabs.v v12, v8, v0.t
131; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
132; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
133; CHECK-NEXT:    vmv1r.v v0, v10
134; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
135; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
136; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
137; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
138; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
139; CHECK-NEXT:    ret
140  %v = call <16 x half> @llvm.vp.rint.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
141  ret <16 x half> %v
142}
143
144define <16 x half> @vp_rint_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) {
145; CHECK-LABEL: vp_rint_v16f16_unmasked:
146; CHECK:       # %bb.0:
147; CHECK-NEXT:    lui a1, %hi(.LCPI7_0)
148; CHECK-NEXT:    flh fa5, %lo(.LCPI7_0)(a1)
149; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
150; CHECK-NEXT:    vfabs.v v10, v8
151; CHECK-NEXT:    vmflt.vf v0, v10, fa5
152; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
153; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
154; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
155; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
156; CHECK-NEXT:    ret
157  %v = call <16 x half> @llvm.vp.rint.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl)
158  ret <16 x half> %v
159}
160
161declare <2 x float> @llvm.vp.rint.v2f32(<2 x float>, <2 x i1>, i32)
162
163define <2 x float> @vp_rint_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext %evl) {
164; CHECK-LABEL: vp_rint_v2f32:
165; CHECK:       # %bb.0:
166; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
167; CHECK-NEXT:    vfabs.v v9, v8, v0.t
168; CHECK-NEXT:    lui a0, 307200
169; CHECK-NEXT:    fmv.w.x fa5, a0
170; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
171; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
172; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
173; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
174; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
175; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
176; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
177; CHECK-NEXT:    ret
178  %v = call <2 x float> @llvm.vp.rint.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl)
179  ret <2 x float> %v
180}
181
182define <2 x float> @vp_rint_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) {
183; CHECK-LABEL: vp_rint_v2f32_unmasked:
184; CHECK:       # %bb.0:
185; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
186; CHECK-NEXT:    vfabs.v v9, v8
187; CHECK-NEXT:    lui a0, 307200
188; CHECK-NEXT:    fmv.w.x fa5, a0
189; CHECK-NEXT:    vmflt.vf v0, v9, fa5
190; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
191; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
192; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
193; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
194; CHECK-NEXT:    ret
195  %v = call <2 x float> @llvm.vp.rint.v2f32(<2 x float> %va, <2 x i1> splat (i1 true), i32 %evl)
196  ret <2 x float> %v
197}
198
199declare <4 x float> @llvm.vp.rint.v4f32(<4 x float>, <4 x i1>, i32)
200
201define <4 x float> @vp_rint_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %evl) {
202; CHECK-LABEL: vp_rint_v4f32:
203; CHECK:       # %bb.0:
204; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
205; CHECK-NEXT:    vfabs.v v9, v8, v0.t
206; CHECK-NEXT:    lui a0, 307200
207; CHECK-NEXT:    fmv.w.x fa5, a0
208; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
209; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
210; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
211; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
212; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
213; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
214; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
215; CHECK-NEXT:    ret
216  %v = call <4 x float> @llvm.vp.rint.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
217  ret <4 x float> %v
218}
219
220define <4 x float> @vp_rint_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) {
221; CHECK-LABEL: vp_rint_v4f32_unmasked:
222; CHECK:       # %bb.0:
223; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
224; CHECK-NEXT:    vfabs.v v9, v8
225; CHECK-NEXT:    lui a0, 307200
226; CHECK-NEXT:    fmv.w.x fa5, a0
227; CHECK-NEXT:    vmflt.vf v0, v9, fa5
228; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
229; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
230; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
231; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
232; CHECK-NEXT:    ret
233  %v = call <4 x float> @llvm.vp.rint.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl)
234  ret <4 x float> %v
235}
236
237declare <8 x float> @llvm.vp.rint.v8f32(<8 x float>, <8 x i1>, i32)
238
239define <8 x float> @vp_rint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
240; CHECK-LABEL: vp_rint_v8f32:
241; CHECK:       # %bb.0:
242; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
243; CHECK-NEXT:    vmv1r.v v10, v0
244; CHECK-NEXT:    vfabs.v v12, v8, v0.t
245; CHECK-NEXT:    lui a0, 307200
246; CHECK-NEXT:    fmv.w.x fa5, a0
247; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
248; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
249; CHECK-NEXT:    vmv1r.v v0, v10
250; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
251; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
252; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
253; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
254; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
255; CHECK-NEXT:    ret
256  %v = call <8 x float> @llvm.vp.rint.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
257  ret <8 x float> %v
258}
259
260define <8 x float> @vp_rint_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) {
261; CHECK-LABEL: vp_rint_v8f32_unmasked:
262; CHECK:       # %bb.0:
263; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
264; CHECK-NEXT:    vfabs.v v10, v8
265; CHECK-NEXT:    lui a0, 307200
266; CHECK-NEXT:    fmv.w.x fa5, a0
267; CHECK-NEXT:    vmflt.vf v0, v10, fa5
268; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
269; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
270; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
271; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
272; CHECK-NEXT:    ret
273  %v = call <8 x float> @llvm.vp.rint.v8f32(<8 x float> %va, <8 x i1> splat (i1 true), i32 %evl)
274  ret <8 x float> %v
275}
276
277declare <16 x float> @llvm.vp.rint.v16f32(<16 x float>, <16 x i1>, i32)
278
279define <16 x float> @vp_rint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
280; CHECK-LABEL: vp_rint_v16f32:
281; CHECK:       # %bb.0:
282; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
283; CHECK-NEXT:    vmv1r.v v12, v0
284; CHECK-NEXT:    vfabs.v v16, v8, v0.t
285; CHECK-NEXT:    lui a0, 307200
286; CHECK-NEXT:    fmv.w.x fa5, a0
287; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
288; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
289; CHECK-NEXT:    vmv1r.v v0, v12
290; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
291; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
292; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
293; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
294; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
295; CHECK-NEXT:    ret
296  %v = call <16 x float> @llvm.vp.rint.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
297  ret <16 x float> %v
298}
299
300define <16 x float> @vp_rint_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) {
301; CHECK-LABEL: vp_rint_v16f32_unmasked:
302; CHECK:       # %bb.0:
303; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
304; CHECK-NEXT:    vfabs.v v12, v8
305; CHECK-NEXT:    lui a0, 307200
306; CHECK-NEXT:    fmv.w.x fa5, a0
307; CHECK-NEXT:    vmflt.vf v0, v12, fa5
308; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
309; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
310; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
311; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
312; CHECK-NEXT:    ret
313  %v = call <16 x float> @llvm.vp.rint.v16f32(<16 x float> %va, <16 x i1> splat (i1 true), i32 %evl)
314  ret <16 x float> %v
315}
316
317declare <2 x double> @llvm.vp.rint.v2f64(<2 x double>, <2 x i1>, i32)
318
319define <2 x double> @vp_rint_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) {
320; CHECK-LABEL: vp_rint_v2f64:
321; CHECK:       # %bb.0:
322; CHECK-NEXT:    lui a1, %hi(.LCPI16_0)
323; CHECK-NEXT:    fld fa5, %lo(.LCPI16_0)(a1)
324; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
325; CHECK-NEXT:    vfabs.v v9, v8, v0.t
326; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
327; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
328; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
329; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
330; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
331; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
332; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
333; CHECK-NEXT:    ret
334  %v = call <2 x double> @llvm.vp.rint.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
335  ret <2 x double> %v
336}
337
338define <2 x double> @vp_rint_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) {
339; CHECK-LABEL: vp_rint_v2f64_unmasked:
340; CHECK:       # %bb.0:
341; CHECK-NEXT:    lui a1, %hi(.LCPI17_0)
342; CHECK-NEXT:    fld fa5, %lo(.LCPI17_0)(a1)
343; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
344; CHECK-NEXT:    vfabs.v v9, v8
345; CHECK-NEXT:    vmflt.vf v0, v9, fa5
346; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
347; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
348; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
349; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
350; CHECK-NEXT:    ret
351  %v = call <2 x double> @llvm.vp.rint.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl)
352  ret <2 x double> %v
353}
354
355declare <4 x double> @llvm.vp.rint.v4f64(<4 x double>, <4 x i1>, i32)
356
357define <4 x double> @vp_rint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
358; CHECK-LABEL: vp_rint_v4f64:
359; CHECK:       # %bb.0:
360; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
361; CHECK-NEXT:    vmv1r.v v10, v0
362; CHECK-NEXT:    lui a0, %hi(.LCPI18_0)
363; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
364; CHECK-NEXT:    vfabs.v v12, v8, v0.t
365; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
366; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
367; CHECK-NEXT:    vmv1r.v v0, v10
368; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
369; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
370; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
371; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
372; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
373; CHECK-NEXT:    ret
374  %v = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
375  ret <4 x double> %v
376}
377
378define <4 x double> @vp_rint_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) {
379; CHECK-LABEL: vp_rint_v4f64_unmasked:
380; CHECK:       # %bb.0:
381; CHECK-NEXT:    lui a1, %hi(.LCPI19_0)
382; CHECK-NEXT:    fld fa5, %lo(.LCPI19_0)(a1)
383; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
384; CHECK-NEXT:    vfabs.v v10, v8
385; CHECK-NEXT:    vmflt.vf v0, v10, fa5
386; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
387; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
388; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
389; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
390; CHECK-NEXT:    ret
391  %v = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl)
392  ret <4 x double> %v
393}
394
395declare <8 x double> @llvm.vp.rint.v8f64(<8 x double>, <8 x i1>, i32)
396
397define <8 x double> @vp_rint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
398; CHECK-LABEL: vp_rint_v8f64:
399; CHECK:       # %bb.0:
400; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
401; CHECK-NEXT:    vmv1r.v v12, v0
402; CHECK-NEXT:    lui a0, %hi(.LCPI20_0)
403; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
404; CHECK-NEXT:    vfabs.v v16, v8, v0.t
405; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
406; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
407; CHECK-NEXT:    vmv1r.v v0, v12
408; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
409; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
410; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
411; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
412; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
413; CHECK-NEXT:    ret
414  %v = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
415  ret <8 x double> %v
416}
417
418define <8 x double> @vp_rint_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) {
419; CHECK-LABEL: vp_rint_v8f64_unmasked:
420; CHECK:       # %bb.0:
421; CHECK-NEXT:    lui a1, %hi(.LCPI21_0)
422; CHECK-NEXT:    fld fa5, %lo(.LCPI21_0)(a1)
423; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
424; CHECK-NEXT:    vfabs.v v12, v8
425; CHECK-NEXT:    vmflt.vf v0, v12, fa5
426; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
427; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
428; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
429; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
430; CHECK-NEXT:    ret
431  %v = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl)
432  ret <8 x double> %v
433}
434
435declare <15 x double> @llvm.vp.rint.v15f64(<15 x double>, <15 x i1>, i32)
436
437define <15 x double> @vp_rint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
438; CHECK-LABEL: vp_rint_v15f64:
439; CHECK:       # %bb.0:
440; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
441; CHECK-NEXT:    vmv1r.v v16, v0
442; CHECK-NEXT:    lui a0, %hi(.LCPI22_0)
443; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
444; CHECK-NEXT:    vfabs.v v24, v8, v0.t
445; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
446; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
447; CHECK-NEXT:    vmv1r.v v0, v16
448; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
449; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
450; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
451; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
452; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
453; CHECK-NEXT:    ret
454  %v = call <15 x double> @llvm.vp.rint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
455  ret <15 x double> %v
456}
457
458define <15 x double> @vp_rint_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) {
459; CHECK-LABEL: vp_rint_v15f64_unmasked:
460; CHECK:       # %bb.0:
461; CHECK-NEXT:    lui a1, %hi(.LCPI23_0)
462; CHECK-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
463; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
464; CHECK-NEXT:    vfabs.v v16, v8
465; CHECK-NEXT:    vmflt.vf v0, v16, fa5
466; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
467; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
468; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
469; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
470; CHECK-NEXT:    ret
471  %v = call <15 x double> @llvm.vp.rint.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl)
472  ret <15 x double> %v
473}
474
475declare <16 x double> @llvm.vp.rint.v16f64(<16 x double>, <16 x i1>, i32)
476
477define <16 x double> @vp_rint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
478; CHECK-LABEL: vp_rint_v16f64:
479; CHECK:       # %bb.0:
480; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
481; CHECK-NEXT:    vmv1r.v v16, v0
482; CHECK-NEXT:    lui a0, %hi(.LCPI24_0)
483; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
484; CHECK-NEXT:    vfabs.v v24, v8, v0.t
485; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
486; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
487; CHECK-NEXT:    vmv1r.v v0, v16
488; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
489; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
490; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
491; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
492; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
493; CHECK-NEXT:    ret
494  %v = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
495  ret <16 x double> %v
496}
497
498define <16 x double> @vp_rint_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) {
499; CHECK-LABEL: vp_rint_v16f64_unmasked:
500; CHECK:       # %bb.0:
501; CHECK-NEXT:    lui a1, %hi(.LCPI25_0)
502; CHECK-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
503; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
504; CHECK-NEXT:    vfabs.v v16, v8
505; CHECK-NEXT:    vmflt.vf v0, v16, fa5
506; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
507; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
508; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
509; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
510; CHECK-NEXT:    ret
511  %v = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl)
512  ret <16 x double> %v
513}
514
515declare <32 x double> @llvm.vp.rint.v32f64(<32 x double>, <32 x i1>, i32)
516
517define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
518; CHECK-LABEL: vp_rint_v32f64:
519; CHECK:       # %bb.0:
520; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
521; CHECK-NEXT:    vmv1r.v v6, v0
522; CHECK-NEXT:    li a2, 16
523; CHECK-NEXT:    vslidedown.vi v7, v0, 2
524; CHECK-NEXT:    mv a1, a0
525; CHECK-NEXT:    bltu a0, a2, .LBB26_2
526; CHECK-NEXT:  # %bb.1:
527; CHECK-NEXT:    li a1, 16
528; CHECK-NEXT:  .LBB26_2:
529; CHECK-NEXT:    addi sp, sp, -16
530; CHECK-NEXT:    .cfi_def_cfa_offset 16
531; CHECK-NEXT:    csrr a2, vlenb
532; CHECK-NEXT:    slli a2, a2, 3
533; CHECK-NEXT:    sub sp, sp, a2
534; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
535; CHECK-NEXT:    vmv1r.v v0, v6
536; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
537; CHECK-NEXT:    vfabs.v v24, v8, v0.t
538; CHECK-NEXT:    addi a1, sp, 16
539; CHECK-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
540; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
541; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
542; CHECK-NEXT:    addi a1, a0, -16
543; CHECK-NEXT:    sltu a0, a0, a1
544; CHECK-NEXT:    addi a0, a0, -1
545; CHECK-NEXT:    addi a2, sp, 16
546; CHECK-NEXT:    vl8r.v v24, (a2) # Unknown-size Folded Reload
547; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
548; CHECK-NEXT:    vmflt.vf v6, v24, fa5, v0.t
549; CHECK-NEXT:    and a0, a0, a1
550; CHECK-NEXT:    vmv1r.v v0, v6
551; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
552; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
553; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
554; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
555; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
556; CHECK-NEXT:    vmv1r.v v0, v7
557; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
558; CHECK-NEXT:    vfabs.v v24, v16, v0.t
559; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
560; CHECK-NEXT:    vmflt.vf v7, v24, fa5, v0.t
561; CHECK-NEXT:    vmv1r.v v0, v7
562; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
563; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
564; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
565; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
566; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
567; CHECK-NEXT:    csrr a0, vlenb
568; CHECK-NEXT:    slli a0, a0, 3
569; CHECK-NEXT:    add sp, sp, a0
570; CHECK-NEXT:    .cfi_def_cfa sp, 16
571; CHECK-NEXT:    addi sp, sp, 16
572; CHECK-NEXT:    .cfi_def_cfa_offset 0
573; CHECK-NEXT:    ret
574  %v = call <32 x double> @llvm.vp.rint.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
575  ret <32 x double> %v
576}
577
578define <32 x double> @vp_rint_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
579; CHECK-LABEL: vp_rint_v32f64_unmasked:
580; CHECK:       # %bb.0:
581; CHECK-NEXT:    li a2, 16
582; CHECK-NEXT:    mv a1, a0
583; CHECK-NEXT:    bltu a0, a2, .LBB27_2
584; CHECK-NEXT:  # %bb.1:
585; CHECK-NEXT:    li a1, 16
586; CHECK-NEXT:  .LBB27_2:
587; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
588; CHECK-NEXT:    vfabs.v v24, v8
589; CHECK-NEXT:    lui a2, %hi(.LCPI27_0)
590; CHECK-NEXT:    fld fa5, %lo(.LCPI27_0)(a2)
591; CHECK-NEXT:    addi a2, a0, -16
592; CHECK-NEXT:    sltu a0, a0, a2
593; CHECK-NEXT:    addi a0, a0, -1
594; CHECK-NEXT:    and a0, a0, a2
595; CHECK-NEXT:    vmflt.vf v0, v24, fa5
596; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
597; CHECK-NEXT:    vfabs.v v24, v16
598; CHECK-NEXT:    vmflt.vf v7, v24, fa5
599; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
600; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
601; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
602; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
603; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
604; CHECK-NEXT:    vmv1r.v v0, v7
605; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
606; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
607; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
608; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
609; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
610; CHECK-NEXT:    ret
611  %v = call <32 x double> @llvm.vp.rint.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl)
612  ret <32 x double> %v
613}
614