xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll (revision 8ce81f17a16b8b689895c7c093d0401a75c09882)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
3; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
5; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \
7; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \
9; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
10
11declare <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half>, <2 x i1>, i32)
12
13define <2 x half> @vp_roundtozero_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) {
14; ZVFH-LABEL: vp_roundtozero_v2f16:
15; ZVFH:       # %bb.0:
16; ZVFH-NEXT:    lui a1, %hi(.LCPI0_0)
17; ZVFH-NEXT:    flh fa5, %lo(.LCPI0_0)(a1)
18; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
19; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
20; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
21; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
22; ZVFH-NEXT:    fsrmi a0, 1
23; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
24; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
25; ZVFH-NEXT:    fsrm a0
26; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
27; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
28; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
29; ZVFH-NEXT:    ret
30;
31; ZVFHMIN-LABEL: vp_roundtozero_v2f16:
32; ZVFHMIN:       # %bb.0:
33; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
34; ZVFHMIN-NEXT:    vmv1r.v v9, v0
35; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
36; ZVFHMIN-NEXT:    lui a0, 307200
37; ZVFHMIN-NEXT:    vmv1r.v v8, v0
38; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
39; ZVFHMIN-NEXT:    vfabs.v v11, v10, v0.t
40; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
41; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
42; ZVFHMIN-NEXT:    vmflt.vf v8, v11, fa5, v0.t
43; ZVFHMIN-NEXT:    fsrmi a0, 1
44; ZVFHMIN-NEXT:    vmv1r.v v0, v8
45; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
46; ZVFHMIN-NEXT:    vfcvt.x.f.v v11, v10, v0.t
47; ZVFHMIN-NEXT:    fsrm a0
48; ZVFHMIN-NEXT:    vfcvt.f.x.v v11, v11, v0.t
49; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
50; ZVFHMIN-NEXT:    vfsgnj.vv v10, v11, v10, v0.t
51; ZVFHMIN-NEXT:    vmv1r.v v0, v9
52; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
53; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
54; ZVFHMIN-NEXT:    ret
55  %v = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
56  ret <2 x half> %v
57}
58
59define <2 x half> @vp_roundtozero_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
60; ZVFH-LABEL: vp_roundtozero_v2f16_unmasked:
61; ZVFH:       # %bb.0:
62; ZVFH-NEXT:    lui a1, %hi(.LCPI1_0)
63; ZVFH-NEXT:    flh fa5, %lo(.LCPI1_0)(a1)
64; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
65; ZVFH-NEXT:    vfabs.v v9, v8
66; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
67; ZVFH-NEXT:    fsrmi a0, 1
68; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
69; ZVFH-NEXT:    fsrm a0
70; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
71; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
72; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
73; ZVFH-NEXT:    ret
74;
75; ZVFHMIN-LABEL: vp_roundtozero_v2f16_unmasked:
76; ZVFHMIN:       # %bb.0:
77; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
78; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
79; ZVFHMIN-NEXT:    lui a0, 307200
80; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
81; ZVFHMIN-NEXT:    vfabs.v v8, v9
82; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
83; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
84; ZVFHMIN-NEXT:    fsrmi a0, 1
85; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v9, v0.t
86; ZVFHMIN-NEXT:    fsrm a0
87; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
88; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
89; ZVFHMIN-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
90; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
91; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
92; ZVFHMIN-NEXT:    ret
93  %v = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl)
94  ret <2 x half> %v
95}
96
97declare <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half>, <4 x i1>, i32)
98
99define <4 x half> @vp_roundtozero_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
100; ZVFH-LABEL: vp_roundtozero_v4f16:
101; ZVFH:       # %bb.0:
102; ZVFH-NEXT:    lui a1, %hi(.LCPI2_0)
103; ZVFH-NEXT:    flh fa5, %lo(.LCPI2_0)(a1)
104; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
105; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
106; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
107; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
108; ZVFH-NEXT:    fsrmi a0, 1
109; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
110; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
111; ZVFH-NEXT:    fsrm a0
112; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
113; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
114; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
115; ZVFH-NEXT:    ret
116;
117; ZVFHMIN-LABEL: vp_roundtozero_v4f16:
118; ZVFHMIN:       # %bb.0:
119; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
120; ZVFHMIN-NEXT:    vmv1r.v v9, v0
121; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
122; ZVFHMIN-NEXT:    lui a0, 307200
123; ZVFHMIN-NEXT:    vmv1r.v v8, v0
124; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
125; ZVFHMIN-NEXT:    vfabs.v v11, v10, v0.t
126; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
127; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
128; ZVFHMIN-NEXT:    vmflt.vf v8, v11, fa5, v0.t
129; ZVFHMIN-NEXT:    fsrmi a0, 1
130; ZVFHMIN-NEXT:    vmv.v.v v0, v8
131; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
132; ZVFHMIN-NEXT:    vfcvt.x.f.v v11, v10, v0.t
133; ZVFHMIN-NEXT:    fsrm a0
134; ZVFHMIN-NEXT:    vfcvt.f.x.v v11, v11, v0.t
135; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
136; ZVFHMIN-NEXT:    vfsgnj.vv v10, v11, v10, v0.t
137; ZVFHMIN-NEXT:    vmv1r.v v0, v9
138; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
139; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
140; ZVFHMIN-NEXT:    ret
141  %v = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
142  ret <4 x half> %v
143}
144
145define <4 x half> @vp_roundtozero_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
146; ZVFH-LABEL: vp_roundtozero_v4f16_unmasked:
147; ZVFH:       # %bb.0:
148; ZVFH-NEXT:    lui a1, %hi(.LCPI3_0)
149; ZVFH-NEXT:    flh fa5, %lo(.LCPI3_0)(a1)
150; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
151; ZVFH-NEXT:    vfabs.v v9, v8
152; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
153; ZVFH-NEXT:    fsrmi a0, 1
154; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
155; ZVFH-NEXT:    fsrm a0
156; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
157; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
158; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
159; ZVFH-NEXT:    ret
160;
161; ZVFHMIN-LABEL: vp_roundtozero_v4f16_unmasked:
162; ZVFHMIN:       # %bb.0:
163; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
164; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
165; ZVFHMIN-NEXT:    lui a0, 307200
166; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
167; ZVFHMIN-NEXT:    vfabs.v v8, v9
168; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
169; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
170; ZVFHMIN-NEXT:    fsrmi a0, 1
171; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v9, v0.t
172; ZVFHMIN-NEXT:    fsrm a0
173; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
174; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
175; ZVFHMIN-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
176; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
177; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
178; ZVFHMIN-NEXT:    ret
179  %v = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl)
180  ret <4 x half> %v
181}
182
183declare <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half>, <8 x i1>, i32)
184
185define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) {
186; ZVFH-LABEL: vp_roundtozero_v8f16:
187; ZVFH:       # %bb.0:
188; ZVFH-NEXT:    lui a1, %hi(.LCPI4_0)
189; ZVFH-NEXT:    flh fa5, %lo(.LCPI4_0)(a1)
190; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
191; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
192; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
193; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
194; ZVFH-NEXT:    fsrmi a0, 1
195; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
196; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
197; ZVFH-NEXT:    fsrm a0
198; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
199; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
200; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
201; ZVFH-NEXT:    ret
202;
203; ZVFHMIN-LABEL: vp_roundtozero_v8f16:
204; ZVFHMIN:       # %bb.0:
205; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
206; ZVFHMIN-NEXT:    vmv1r.v v9, v0
207; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
208; ZVFHMIN-NEXT:    lui a0, 307200
209; ZVFHMIN-NEXT:    vmv1r.v v8, v0
210; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
211; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
212; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
213; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
214; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
215; ZVFHMIN-NEXT:    fsrmi a0, 1
216; ZVFHMIN-NEXT:    vmv1r.v v0, v8
217; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
218; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
219; ZVFHMIN-NEXT:    fsrm a0
220; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
221; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
222; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
223; ZVFHMIN-NEXT:    vmv1r.v v0, v9
224; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
225; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
226; ZVFHMIN-NEXT:    ret
227  %v = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
228  ret <8 x half> %v
229}
230
231define <8 x half> @vp_roundtozero_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
232; ZVFH-LABEL: vp_roundtozero_v8f16_unmasked:
233; ZVFH:       # %bb.0:
234; ZVFH-NEXT:    lui a1, %hi(.LCPI5_0)
235; ZVFH-NEXT:    flh fa5, %lo(.LCPI5_0)(a1)
236; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
237; ZVFH-NEXT:    vfabs.v v9, v8
238; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
239; ZVFH-NEXT:    fsrmi a0, 1
240; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
241; ZVFH-NEXT:    fsrm a0
242; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
243; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
244; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
245; ZVFH-NEXT:    ret
246;
247; ZVFHMIN-LABEL: vp_roundtozero_v8f16_unmasked:
248; ZVFHMIN:       # %bb.0:
249; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
250; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
251; ZVFHMIN-NEXT:    lui a0, 307200
252; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
253; ZVFHMIN-NEXT:    vfabs.v v8, v10
254; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
255; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
256; ZVFHMIN-NEXT:    fsrmi a0, 1
257; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
258; ZVFHMIN-NEXT:    fsrm a0
259; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
260; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
261; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
262; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
263; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
264; ZVFHMIN-NEXT:    ret
265  %v = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl)
266  ret <8 x half> %v
267}
268
269declare <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half>, <16 x i1>, i32)
270
271define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
272; ZVFH-LABEL: vp_roundtozero_v16f16:
273; ZVFH:       # %bb.0:
274; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
275; ZVFH-NEXT:    vmv1r.v v10, v0
276; ZVFH-NEXT:    lui a0, %hi(.LCPI6_0)
277; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a0)
278; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
279; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
280; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
281; ZVFH-NEXT:    fsrmi a0, 1
282; ZVFH-NEXT:    vmv1r.v v0, v10
283; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
284; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
285; ZVFH-NEXT:    fsrm a0
286; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
287; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
288; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
289; ZVFH-NEXT:    ret
290;
291; ZVFHMIN-LABEL: vp_roundtozero_v16f16:
292; ZVFHMIN:       # %bb.0:
293; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
294; ZVFHMIN-NEXT:    vmv1r.v v10, v0
295; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
296; ZVFHMIN-NEXT:    lui a0, 307200
297; ZVFHMIN-NEXT:    vmv1r.v v8, v0
298; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
299; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
300; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
301; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
302; ZVFHMIN-NEXT:    vmflt.vf v8, v16, fa5, v0.t
303; ZVFHMIN-NEXT:    fsrmi a0, 1
304; ZVFHMIN-NEXT:    vmv1r.v v0, v8
305; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
306; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
307; ZVFHMIN-NEXT:    fsrm a0
308; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
309; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
310; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
311; ZVFHMIN-NEXT:    vmv1r.v v0, v10
312; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
313; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
314; ZVFHMIN-NEXT:    ret
315  %v = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
316  ret <16 x half> %v
317}
318
319define <16 x half> @vp_roundtozero_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) {
320; ZVFH-LABEL: vp_roundtozero_v16f16_unmasked:
321; ZVFH:       # %bb.0:
322; ZVFH-NEXT:    lui a1, %hi(.LCPI7_0)
323; ZVFH-NEXT:    flh fa5, %lo(.LCPI7_0)(a1)
324; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
325; ZVFH-NEXT:    vfabs.v v10, v8
326; ZVFH-NEXT:    vmflt.vf v0, v10, fa5
327; ZVFH-NEXT:    fsrmi a0, 1
328; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
329; ZVFH-NEXT:    fsrm a0
330; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
331; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
332; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
333; ZVFH-NEXT:    ret
334;
335; ZVFHMIN-LABEL: vp_roundtozero_v16f16_unmasked:
336; ZVFHMIN:       # %bb.0:
337; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
338; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
339; ZVFHMIN-NEXT:    lui a0, 307200
340; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
341; ZVFHMIN-NEXT:    vfabs.v v8, v12
342; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
343; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
344; ZVFHMIN-NEXT:    fsrmi a0, 1
345; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
346; ZVFHMIN-NEXT:    fsrm a0
347; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
348; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
349; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
350; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
351; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
352; ZVFHMIN-NEXT:    ret
353  %v = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl)
354  ret <16 x half> %v
355}
356
357declare <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float>, <2 x i1>, i32)
358
359define <2 x float> @vp_roundtozero_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext %evl) {
360; CHECK-LABEL: vp_roundtozero_v2f32:
361; CHECK:       # %bb.0:
362; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
363; CHECK-NEXT:    vfabs.v v9, v8, v0.t
364; CHECK-NEXT:    lui a0, 307200
365; CHECK-NEXT:    fmv.w.x fa5, a0
366; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
367; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
368; CHECK-NEXT:    fsrmi a0, 1
369; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
370; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
371; CHECK-NEXT:    fsrm a0
372; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
373; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
374; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
375; CHECK-NEXT:    ret
376  %v = call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl)
377  ret <2 x float> %v
378}
379
380define <2 x float> @vp_roundtozero_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) {
381; CHECK-LABEL: vp_roundtozero_v2f32_unmasked:
382; CHECK:       # %bb.0:
383; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
384; CHECK-NEXT:    vfabs.v v9, v8
385; CHECK-NEXT:    lui a0, 307200
386; CHECK-NEXT:    fmv.w.x fa5, a0
387; CHECK-NEXT:    vmflt.vf v0, v9, fa5
388; CHECK-NEXT:    fsrmi a0, 1
389; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
390; CHECK-NEXT:    fsrm a0
391; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
392; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
393; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
394; CHECK-NEXT:    ret
395  %v = call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> %va, <2 x i1> splat (i1 true), i32 %evl)
396  ret <2 x float> %v
397}
398
399declare <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float>, <4 x i1>, i32)
400
401define <4 x float> @vp_roundtozero_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %evl) {
402; CHECK-LABEL: vp_roundtozero_v4f32:
403; CHECK:       # %bb.0:
404; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
405; CHECK-NEXT:    vfabs.v v9, v8, v0.t
406; CHECK-NEXT:    lui a0, 307200
407; CHECK-NEXT:    fmv.w.x fa5, a0
408; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
409; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
410; CHECK-NEXT:    fsrmi a0, 1
411; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
412; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
413; CHECK-NEXT:    fsrm a0
414; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
415; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
416; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
417; CHECK-NEXT:    ret
418  %v = call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
419  ret <4 x float> %v
420}
421
422define <4 x float> @vp_roundtozero_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) {
423; CHECK-LABEL: vp_roundtozero_v4f32_unmasked:
424; CHECK:       # %bb.0:
425; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
426; CHECK-NEXT:    vfabs.v v9, v8
427; CHECK-NEXT:    lui a0, 307200
428; CHECK-NEXT:    fmv.w.x fa5, a0
429; CHECK-NEXT:    vmflt.vf v0, v9, fa5
430; CHECK-NEXT:    fsrmi a0, 1
431; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
432; CHECK-NEXT:    fsrm a0
433; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
434; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
435; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
436; CHECK-NEXT:    ret
437  %v = call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl)
438  ret <4 x float> %v
439}
440
441declare <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float>, <8 x i1>, i32)
442
443define <8 x float> @vp_roundtozero_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
444; CHECK-LABEL: vp_roundtozero_v8f32:
445; CHECK:       # %bb.0:
446; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
447; CHECK-NEXT:    vmv1r.v v10, v0
448; CHECK-NEXT:    vfabs.v v12, v8, v0.t
449; CHECK-NEXT:    lui a0, 307200
450; CHECK-NEXT:    fmv.w.x fa5, a0
451; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
452; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
453; CHECK-NEXT:    fsrmi a0, 1
454; CHECK-NEXT:    vmv1r.v v0, v10
455; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
456; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
457; CHECK-NEXT:    fsrm a0
458; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
459; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
460; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
461; CHECK-NEXT:    ret
462  %v = call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
463  ret <8 x float> %v
464}
465
466define <8 x float> @vp_roundtozero_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) {
467; CHECK-LABEL: vp_roundtozero_v8f32_unmasked:
468; CHECK:       # %bb.0:
469; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
470; CHECK-NEXT:    vfabs.v v10, v8
471; CHECK-NEXT:    lui a0, 307200
472; CHECK-NEXT:    fmv.w.x fa5, a0
473; CHECK-NEXT:    vmflt.vf v0, v10, fa5
474; CHECK-NEXT:    fsrmi a0, 1
475; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
476; CHECK-NEXT:    fsrm a0
477; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
478; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
479; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
480; CHECK-NEXT:    ret
481  %v = call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> %va, <8 x i1> splat (i1 true), i32 %evl)
482  ret <8 x float> %v
483}
484
485declare <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float>, <16 x i1>, i32)
486
487define <16 x float> @vp_roundtozero_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
488; CHECK-LABEL: vp_roundtozero_v16f32:
489; CHECK:       # %bb.0:
490; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
491; CHECK-NEXT:    vmv1r.v v12, v0
492; CHECK-NEXT:    vfabs.v v16, v8, v0.t
493; CHECK-NEXT:    lui a0, 307200
494; CHECK-NEXT:    fmv.w.x fa5, a0
495; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
496; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
497; CHECK-NEXT:    fsrmi a0, 1
498; CHECK-NEXT:    vmv1r.v v0, v12
499; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
500; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
501; CHECK-NEXT:    fsrm a0
502; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
503; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
504; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
505; CHECK-NEXT:    ret
506  %v = call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
507  ret <16 x float> %v
508}
509
510define <16 x float> @vp_roundtozero_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) {
511; CHECK-LABEL: vp_roundtozero_v16f32_unmasked:
512; CHECK:       # %bb.0:
513; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
514; CHECK-NEXT:    vfabs.v v12, v8
515; CHECK-NEXT:    lui a0, 307200
516; CHECK-NEXT:    fmv.w.x fa5, a0
517; CHECK-NEXT:    vmflt.vf v0, v12, fa5
518; CHECK-NEXT:    fsrmi a0, 1
519; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
520; CHECK-NEXT:    fsrm a0
521; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
522; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
523; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
524; CHECK-NEXT:    ret
525  %v = call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> %va, <16 x i1> splat (i1 true), i32 %evl)
526  ret <16 x float> %v
527}
528
529declare <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double>, <2 x i1>, i32)
530
531define <2 x double> @vp_roundtozero_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) {
532; CHECK-LABEL: vp_roundtozero_v2f64:
533; CHECK:       # %bb.0:
534; CHECK-NEXT:    lui a1, %hi(.LCPI16_0)
535; CHECK-NEXT:    fld fa5, %lo(.LCPI16_0)(a1)
536; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
537; CHECK-NEXT:    vfabs.v v9, v8, v0.t
538; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
539; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
540; CHECK-NEXT:    fsrmi a0, 1
541; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
542; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
543; CHECK-NEXT:    fsrm a0
544; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
545; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
546; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
547; CHECK-NEXT:    ret
548  %v = call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
549  ret <2 x double> %v
550}
551
552define <2 x double> @vp_roundtozero_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) {
553; CHECK-LABEL: vp_roundtozero_v2f64_unmasked:
554; CHECK:       # %bb.0:
555; CHECK-NEXT:    lui a1, %hi(.LCPI17_0)
556; CHECK-NEXT:    fld fa5, %lo(.LCPI17_0)(a1)
557; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
558; CHECK-NEXT:    vfabs.v v9, v8
559; CHECK-NEXT:    vmflt.vf v0, v9, fa5
560; CHECK-NEXT:    fsrmi a0, 1
561; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
562; CHECK-NEXT:    fsrm a0
563; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
564; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
565; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
566; CHECK-NEXT:    ret
567  %v = call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl)
568  ret <2 x double> %v
569}
570
571declare <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double>, <4 x i1>, i32)
572
573define <4 x double> @vp_roundtozero_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
574; CHECK-LABEL: vp_roundtozero_v4f64:
575; CHECK:       # %bb.0:
576; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
577; CHECK-NEXT:    vmv1r.v v10, v0
578; CHECK-NEXT:    lui a0, %hi(.LCPI18_0)
579; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
580; CHECK-NEXT:    vfabs.v v12, v8, v0.t
581; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
582; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
583; CHECK-NEXT:    fsrmi a0, 1
584; CHECK-NEXT:    vmv1r.v v0, v10
585; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
586; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
587; CHECK-NEXT:    fsrm a0
588; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
589; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
590; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
591; CHECK-NEXT:    ret
592  %v = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
593  ret <4 x double> %v
594}
595
596define <4 x double> @vp_roundtozero_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) {
597; CHECK-LABEL: vp_roundtozero_v4f64_unmasked:
598; CHECK:       # %bb.0:
599; CHECK-NEXT:    lui a1, %hi(.LCPI19_0)
600; CHECK-NEXT:    fld fa5, %lo(.LCPI19_0)(a1)
601; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
602; CHECK-NEXT:    vfabs.v v10, v8
603; CHECK-NEXT:    vmflt.vf v0, v10, fa5
604; CHECK-NEXT:    fsrmi a0, 1
605; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
606; CHECK-NEXT:    fsrm a0
607; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
608; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
609; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
610; CHECK-NEXT:    ret
611  %v = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl)
612  ret <4 x double> %v
613}
614
615declare <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double>, <8 x i1>, i32)
616
617define <8 x double> @vp_roundtozero_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
618; CHECK-LABEL: vp_roundtozero_v8f64:
619; CHECK:       # %bb.0:
620; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
621; CHECK-NEXT:    vmv1r.v v12, v0
622; CHECK-NEXT:    lui a0, %hi(.LCPI20_0)
623; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
624; CHECK-NEXT:    vfabs.v v16, v8, v0.t
625; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
626; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
627; CHECK-NEXT:    fsrmi a0, 1
628; CHECK-NEXT:    vmv1r.v v0, v12
629; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
630; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
631; CHECK-NEXT:    fsrm a0
632; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
633; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
634; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
635; CHECK-NEXT:    ret
636  %v = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
637  ret <8 x double> %v
638}
639
640define <8 x double> @vp_roundtozero_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) {
641; CHECK-LABEL: vp_roundtozero_v8f64_unmasked:
642; CHECK:       # %bb.0:
643; CHECK-NEXT:    lui a1, %hi(.LCPI21_0)
644; CHECK-NEXT:    fld fa5, %lo(.LCPI21_0)(a1)
645; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
646; CHECK-NEXT:    vfabs.v v12, v8
647; CHECK-NEXT:    vmflt.vf v0, v12, fa5
648; CHECK-NEXT:    fsrmi a0, 1
649; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
650; CHECK-NEXT:    fsrm a0
651; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
652; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
653; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
654; CHECK-NEXT:    ret
655  %v = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl)
656  ret <8 x double> %v
657}
658
659declare <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double>, <15 x i1>, i32)
660
661define <15 x double> @vp_roundtozero_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
662; CHECK-LABEL: vp_roundtozero_v15f64:
663; CHECK:       # %bb.0:
664; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
665; CHECK-NEXT:    vmv1r.v v16, v0
666; CHECK-NEXT:    lui a0, %hi(.LCPI22_0)
667; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
668; CHECK-NEXT:    vfabs.v v24, v8, v0.t
669; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
670; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
671; CHECK-NEXT:    fsrmi a0, 1
672; CHECK-NEXT:    vmv1r.v v0, v16
673; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
674; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
675; CHECK-NEXT:    fsrm a0
676; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
677; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
678; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
679; CHECK-NEXT:    ret
680  %v = call <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
681  ret <15 x double> %v
682}
683
684define <15 x double> @vp_roundtozero_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) {
685; CHECK-LABEL: vp_roundtozero_v15f64_unmasked:
686; CHECK:       # %bb.0:
687; CHECK-NEXT:    lui a1, %hi(.LCPI23_0)
688; CHECK-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
689; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
690; CHECK-NEXT:    vfabs.v v16, v8
691; CHECK-NEXT:    vmflt.vf v0, v16, fa5
692; CHECK-NEXT:    fsrmi a0, 1
693; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
694; CHECK-NEXT:    fsrm a0
695; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
696; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
697; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
698; CHECK-NEXT:    ret
699  %v = call <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl)
700  ret <15 x double> %v
701}
702
703declare <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double>, <16 x i1>, i32)
704
705define <16 x double> @vp_roundtozero_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
706; CHECK-LABEL: vp_roundtozero_v16f64:
707; CHECK:       # %bb.0:
708; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
709; CHECK-NEXT:    vmv1r.v v16, v0
710; CHECK-NEXT:    lui a0, %hi(.LCPI24_0)
711; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
712; CHECK-NEXT:    vfabs.v v24, v8, v0.t
713; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
714; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
715; CHECK-NEXT:    fsrmi a0, 1
716; CHECK-NEXT:    vmv1r.v v0, v16
717; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
718; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
719; CHECK-NEXT:    fsrm a0
720; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
721; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
722; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
723; CHECK-NEXT:    ret
724  %v = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
725  ret <16 x double> %v
726}
727
728define <16 x double> @vp_roundtozero_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) {
729; CHECK-LABEL: vp_roundtozero_v16f64_unmasked:
730; CHECK:       # %bb.0:
731; CHECK-NEXT:    lui a1, %hi(.LCPI25_0)
732; CHECK-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
733; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
734; CHECK-NEXT:    vfabs.v v16, v8
735; CHECK-NEXT:    vmflt.vf v0, v16, fa5
736; CHECK-NEXT:    fsrmi a0, 1
737; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
738; CHECK-NEXT:    fsrm a0
739; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
740; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
741; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
742; CHECK-NEXT:    ret
743  %v = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl)
744  ret <16 x double> %v
745}
746
747declare <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double>, <32 x i1>, i32)
748
749define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
750; CHECK-LABEL: vp_roundtozero_v32f64:
751; CHECK:       # %bb.0:
752; CHECK-NEXT:    addi sp, sp, -16
753; CHECK-NEXT:    .cfi_def_cfa_offset 16
754; CHECK-NEXT:    csrr a1, vlenb
755; CHECK-NEXT:    slli a1, a1, 4
756; CHECK-NEXT:    sub sp, sp, a1
757; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
758; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
759; CHECK-NEXT:    vmv1r.v v25, v0
760; CHECK-NEXT:    csrr a1, vlenb
761; CHECK-NEXT:    slli a1, a1, 3
762; CHECK-NEXT:    add a1, sp, a1
763; CHECK-NEXT:    addi a1, a1, 16
764; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
765; CHECK-NEXT:    li a2, 16
766; CHECK-NEXT:    vslidedown.vi v24, v0, 2
767; CHECK-NEXT:    mv a1, a0
768; CHECK-NEXT:    bltu a0, a2, .LBB26_2
769; CHECK-NEXT:  # %bb.1:
770; CHECK-NEXT:    li a1, 16
771; CHECK-NEXT:  .LBB26_2:
772; CHECK-NEXT:    vmv1r.v v0, v25
773; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
774; CHECK-NEXT:    vfabs.v v16, v8, v0.t
775; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
776; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
777; CHECK-NEXT:    addi a1, a0, -16
778; CHECK-NEXT:    sltu a0, a0, a1
779; CHECK-NEXT:    addi a0, a0, -1
780; CHECK-NEXT:    and a0, a0, a1
781; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
782; CHECK-NEXT:    vmflt.vf v25, v16, fa5, v0.t
783; CHECK-NEXT:    fsrmi a1, 1
784; CHECK-NEXT:    vmv1r.v v0, v25
785; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
786; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
787; CHECK-NEXT:    fsrm a1
788; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
789; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
790; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
791; CHECK-NEXT:    addi a1, sp, 16
792; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
793; CHECK-NEXT:    vmv1r.v v0, v24
794; CHECK-NEXT:    csrr a1, vlenb
795; CHECK-NEXT:    slli a1, a1, 3
796; CHECK-NEXT:    add a1, sp, a1
797; CHECK-NEXT:    addi a1, a1, 16
798; CHECK-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
799; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
800; CHECK-NEXT:    vfabs.v v8, v16, v0.t
801; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
802; CHECK-NEXT:    vmflt.vf v24, v8, fa5, v0.t
803; CHECK-NEXT:    fsrmi a0, 1
804; CHECK-NEXT:    vmv1r.v v0, v24
805; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
806; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
807; CHECK-NEXT:    fsrm a0
808; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
809; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
810; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
811; CHECK-NEXT:    addi a0, sp, 16
812; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
813; CHECK-NEXT:    csrr a0, vlenb
814; CHECK-NEXT:    slli a0, a0, 4
815; CHECK-NEXT:    add sp, sp, a0
816; CHECK-NEXT:    .cfi_def_cfa sp, 16
817; CHECK-NEXT:    addi sp, sp, 16
818; CHECK-NEXT:    .cfi_def_cfa_offset 0
819; CHECK-NEXT:    ret
820  %v = call <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
821  ret <32 x double> %v
822}
823
824define <32 x double> @vp_roundtozero_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
825; CHECK-LABEL: vp_roundtozero_v32f64_unmasked:
826; CHECK:       # %bb.0:
827; CHECK-NEXT:    li a2, 16
828; CHECK-NEXT:    mv a1, a0
829; CHECK-NEXT:    bltu a0, a2, .LBB27_2
830; CHECK-NEXT:  # %bb.1:
831; CHECK-NEXT:    li a1, 16
832; CHECK-NEXT:  .LBB27_2:
833; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
834; CHECK-NEXT:    vfabs.v v24, v8
835; CHECK-NEXT:    lui a2, %hi(.LCPI27_0)
836; CHECK-NEXT:    fld fa5, %lo(.LCPI27_0)(a2)
837; CHECK-NEXT:    addi a2, a0, -16
838; CHECK-NEXT:    sltu a0, a0, a2
839; CHECK-NEXT:    addi a0, a0, -1
840; CHECK-NEXT:    and a0, a0, a2
841; CHECK-NEXT:    fsrmi a2, 1
842; CHECK-NEXT:    vmflt.vf v0, v24, fa5
843; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
844; CHECK-NEXT:    vfabs.v v24, v16
845; CHECK-NEXT:    vmflt.vf v7, v24, fa5
846; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
847; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
848; CHECK-NEXT:    fsrm a2
849; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
850; CHECK-NEXT:    fsrmi a1, 1
851; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
852; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
853; CHECK-NEXT:    vmv1r.v v0, v7
854; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
855; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
856; CHECK-NEXT:    fsrm a1
857; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
858; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
859; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
860; CHECK-NEXT:    ret
861  %v = call <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl)
862  ret <32 x double> %v
863}
864