xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll (revision 8ce81f17a16b8b689895c7c093d0401a75c09882)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
3; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
4; RUN:     --check-prefixes=CHECK,ZVFH
5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
6; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
7; RUN:     --check-prefixes=CHECK,ZVFH
8; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
9; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
10; RUN:     --check-prefixes=CHECK,ZVFHMIN
11; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
12; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
13; RUN:     --check-prefixes=CHECK,ZVFHMIN
14
15declare <vscale x 1 x bfloat> @llvm.vp.ceil.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
16
17define <vscale x 1 x bfloat> @vp_ceil_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
18; CHECK-LABEL: vp_ceil_vv_nxv1bf16:
19; CHECK:       # %bb.0:
20; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
21; CHECK-NEXT:    vmv1r.v v9, v0
22; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
23; CHECK-NEXT:    lui a0, 307200
24; CHECK-NEXT:    vmv1r.v v8, v0
25; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
26; CHECK-NEXT:    vfabs.v v11, v10, v0.t
27; CHECK-NEXT:    fmv.w.x fa5, a0
28; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
29; CHECK-NEXT:    vmflt.vf v8, v11, fa5, v0.t
30; CHECK-NEXT:    fsrmi a0, 3
31; CHECK-NEXT:    vmv1r.v v0, v8
32; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
33; CHECK-NEXT:    vfcvt.x.f.v v11, v10, v0.t
34; CHECK-NEXT:    fsrm a0
35; CHECK-NEXT:    vfcvt.f.x.v v11, v11, v0.t
36; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
37; CHECK-NEXT:    vfsgnj.vv v10, v11, v10, v0.t
38; CHECK-NEXT:    vmv1r.v v0, v9
39; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
40; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
41; CHECK-NEXT:    ret
42  %v = call <vscale x 1 x bfloat> @llvm.vp.ceil.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
43  ret <vscale x 1 x bfloat> %v
44}
45
46define <vscale x 1 x bfloat> @vp_ceil_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) {
47; CHECK-LABEL: vp_ceil_vv_nxv1bf16_unmasked:
48; CHECK:       # %bb.0:
49; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
50; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
51; CHECK-NEXT:    lui a0, 307200
52; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
53; CHECK-NEXT:    vfabs.v v8, v9
54; CHECK-NEXT:    fmv.w.x fa5, a0
55; CHECK-NEXT:    vmflt.vf v0, v8, fa5
56; CHECK-NEXT:    fsrmi a0, 3
57; CHECK-NEXT:    vfcvt.x.f.v v8, v9, v0.t
58; CHECK-NEXT:    fsrm a0
59; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
60; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
61; CHECK-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
62; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
63; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
64; CHECK-NEXT:    ret
65  %v = call <vscale x 1 x bfloat> @llvm.vp.ceil.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
66  ret <vscale x 1 x bfloat> %v
67}
68
69declare <vscale x 2 x bfloat> @llvm.vp.ceil.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
70
71define <vscale x 2 x bfloat> @vp_ceil_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
72; CHECK-LABEL: vp_ceil_vv_nxv2bf16:
73; CHECK:       # %bb.0:
74; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
75; CHECK-NEXT:    vmv1r.v v9, v0
76; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
77; CHECK-NEXT:    lui a0, 307200
78; CHECK-NEXT:    vmv1r.v v8, v0
79; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
80; CHECK-NEXT:    vfabs.v v11, v10, v0.t
81; CHECK-NEXT:    fmv.w.x fa5, a0
82; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
83; CHECK-NEXT:    vmflt.vf v8, v11, fa5, v0.t
84; CHECK-NEXT:    fsrmi a0, 3
85; CHECK-NEXT:    vmv.v.v v0, v8
86; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
87; CHECK-NEXT:    vfcvt.x.f.v v11, v10, v0.t
88; CHECK-NEXT:    fsrm a0
89; CHECK-NEXT:    vfcvt.f.x.v v11, v11, v0.t
90; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
91; CHECK-NEXT:    vfsgnj.vv v10, v11, v10, v0.t
92; CHECK-NEXT:    vmv1r.v v0, v9
93; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
94; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
95; CHECK-NEXT:    ret
96  %v = call <vscale x 2 x bfloat> @llvm.vp.ceil.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
97  ret <vscale x 2 x bfloat> %v
98}
99
100define <vscale x 2 x bfloat> @vp_ceil_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) {
101; CHECK-LABEL: vp_ceil_vv_nxv2bf16_unmasked:
102; CHECK:       # %bb.0:
103; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
104; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
105; CHECK-NEXT:    lui a0, 307200
106; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
107; CHECK-NEXT:    vfabs.v v8, v9
108; CHECK-NEXT:    fmv.w.x fa5, a0
109; CHECK-NEXT:    vmflt.vf v0, v8, fa5
110; CHECK-NEXT:    fsrmi a0, 3
111; CHECK-NEXT:    vfcvt.x.f.v v8, v9, v0.t
112; CHECK-NEXT:    fsrm a0
113; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
114; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
115; CHECK-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
116; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
117; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
118; CHECK-NEXT:    ret
119  %v = call <vscale x 2 x bfloat> @llvm.vp.ceil.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
120  ret <vscale x 2 x bfloat> %v
121}
122
123declare <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
124
125define <vscale x 4 x bfloat> @vp_ceil_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
126; CHECK-LABEL: vp_ceil_vv_nxv4bf16:
127; CHECK:       # %bb.0:
128; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
129; CHECK-NEXT:    vmv1r.v v9, v0
130; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
131; CHECK-NEXT:    lui a0, 307200
132; CHECK-NEXT:    vmv1r.v v8, v0
133; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
134; CHECK-NEXT:    vfabs.v v12, v10, v0.t
135; CHECK-NEXT:    fmv.w.x fa5, a0
136; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
137; CHECK-NEXT:    vmflt.vf v8, v12, fa5, v0.t
138; CHECK-NEXT:    fsrmi a0, 3
139; CHECK-NEXT:    vmv1r.v v0, v8
140; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
141; CHECK-NEXT:    vfcvt.x.f.v v12, v10, v0.t
142; CHECK-NEXT:    fsrm a0
143; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
144; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
145; CHECK-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
146; CHECK-NEXT:    vmv1r.v v0, v9
147; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
148; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
149; CHECK-NEXT:    ret
150  %v = call <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl)
151  ret <vscale x 4 x bfloat> %v
152}
153
154define <vscale x 4 x bfloat> @vp_ceil_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) {
155; CHECK-LABEL: vp_ceil_vv_nxv4bf16_unmasked:
156; CHECK:       # %bb.0:
157; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
158; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
159; CHECK-NEXT:    lui a0, 307200
160; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
161; CHECK-NEXT:    vfabs.v v8, v10
162; CHECK-NEXT:    fmv.w.x fa5, a0
163; CHECK-NEXT:    vmflt.vf v0, v8, fa5
164; CHECK-NEXT:    fsrmi a0, 3
165; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
166; CHECK-NEXT:    fsrm a0
167; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
168; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
169; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
170; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
171; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
172; CHECK-NEXT:    ret
173  %v = call <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
174  ret <vscale x 4 x bfloat> %v
175}
176
177declare <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
178
179define <vscale x 8 x bfloat> @vp_ceil_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
180; CHECK-LABEL: vp_ceil_vv_nxv8bf16:
181; CHECK:       # %bb.0:
182; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
183; CHECK-NEXT:    vmv1r.v v10, v0
184; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
185; CHECK-NEXT:    lui a0, 307200
186; CHECK-NEXT:    vmv1r.v v8, v0
187; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
188; CHECK-NEXT:    vfabs.v v16, v12, v0.t
189; CHECK-NEXT:    fmv.w.x fa5, a0
190; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
191; CHECK-NEXT:    vmflt.vf v8, v16, fa5, v0.t
192; CHECK-NEXT:    fsrmi a0, 3
193; CHECK-NEXT:    vmv1r.v v0, v8
194; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
195; CHECK-NEXT:    vfcvt.x.f.v v16, v12, v0.t
196; CHECK-NEXT:    fsrm a0
197; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
198; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
199; CHECK-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
200; CHECK-NEXT:    vmv1r.v v0, v10
201; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
202; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
203; CHECK-NEXT:    ret
204  %v = call <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl)
205  ret <vscale x 8 x bfloat> %v
206}
207
208define <vscale x 8 x bfloat> @vp_ceil_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) {
209; CHECK-LABEL: vp_ceil_vv_nxv8bf16_unmasked:
210; CHECK:       # %bb.0:
211; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
212; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
213; CHECK-NEXT:    lui a0, 307200
214; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
215; CHECK-NEXT:    vfabs.v v8, v12
216; CHECK-NEXT:    fmv.w.x fa5, a0
217; CHECK-NEXT:    vmflt.vf v0, v8, fa5
218; CHECK-NEXT:    fsrmi a0, 3
219; CHECK-NEXT:    vfcvt.x.f.v v8, v12, v0.t
220; CHECK-NEXT:    fsrm a0
221; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
222; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
223; CHECK-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
224; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
225; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
226; CHECK-NEXT:    ret
227  %v = call <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
228  ret <vscale x 8 x bfloat> %v
229}
230
231declare <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
232
233define <vscale x 16 x bfloat> @vp_ceil_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
234; CHECK-LABEL: vp_ceil_vv_nxv16bf16:
235; CHECK:       # %bb.0:
236; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
237; CHECK-NEXT:    vmv1r.v v12, v0
238; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
239; CHECK-NEXT:    lui a0, 307200
240; CHECK-NEXT:    vmv1r.v v8, v0
241; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
242; CHECK-NEXT:    vfabs.v v24, v16, v0.t
243; CHECK-NEXT:    fmv.w.x fa5, a0
244; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
245; CHECK-NEXT:    vmflt.vf v8, v24, fa5, v0.t
246; CHECK-NEXT:    fsrmi a0, 3
247; CHECK-NEXT:    vmv1r.v v0, v8
248; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
249; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
250; CHECK-NEXT:    fsrm a0
251; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
252; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
253; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
254; CHECK-NEXT:    vmv1r.v v0, v12
255; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
256; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
257; CHECK-NEXT:    ret
258  %v = call <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl)
259  ret <vscale x 16 x bfloat> %v
260}
261
262define <vscale x 16 x bfloat> @vp_ceil_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) {
263; CHECK-LABEL: vp_ceil_vv_nxv16bf16_unmasked:
264; CHECK:       # %bb.0:
265; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
266; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
267; CHECK-NEXT:    lui a0, 307200
268; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
269; CHECK-NEXT:    vfabs.v v8, v16
270; CHECK-NEXT:    fmv.w.x fa5, a0
271; CHECK-NEXT:    vmflt.vf v0, v8, fa5
272; CHECK-NEXT:    fsrmi a0, 3
273; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
274; CHECK-NEXT:    fsrm a0
275; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
276; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
277; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
278; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
279; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
280; CHECK-NEXT:    ret
281  %v = call <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
282  ret <vscale x 16 x bfloat> %v
283}
284
285declare <vscale x 32 x bfloat> @llvm.vp.ceil.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
286
287define <vscale x 32 x bfloat> @vp_ceil_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
288; CHECK-LABEL: vp_ceil_vv_nxv32bf16:
289; CHECK:       # %bb.0:
290; CHECK-NEXT:    addi sp, sp, -16
291; CHECK-NEXT:    .cfi_def_cfa_offset 16
292; CHECK-NEXT:    csrr a1, vlenb
293; CHECK-NEXT:    slli a1, a1, 3
294; CHECK-NEXT:    sub sp, sp, a1
295; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
296; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
297; CHECK-NEXT:    vmv1r.v v7, v0
298; CHECK-NEXT:    csrr a2, vlenb
299; CHECK-NEXT:    lui a3, 307200
300; CHECK-NEXT:    slli a1, a2, 1
301; CHECK-NEXT:    srli a2, a2, 2
302; CHECK-NEXT:    fmv.w.x fa5, a3
303; CHECK-NEXT:    sub a3, a0, a1
304; CHECK-NEXT:    vslidedown.vx v17, v0, a2
305; CHECK-NEXT:    sltu a2, a0, a3
306; CHECK-NEXT:    vmv1r.v v18, v17
307; CHECK-NEXT:    addi a2, a2, -1
308; CHECK-NEXT:    and a2, a2, a3
309; CHECK-NEXT:    vmv1r.v v0, v17
310; CHECK-NEXT:    addi a3, sp, 16
311; CHECK-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
312; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
313; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
314; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
315; CHECK-NEXT:    vfabs.v v8, v24, v0.t
316; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
317; CHECK-NEXT:    vmflt.vf v18, v8, fa5, v0.t
318; CHECK-NEXT:    fsrmi a2, 3
319; CHECK-NEXT:    vmv1r.v v0, v18
320; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
321; CHECK-NEXT:    vfcvt.x.f.v v8, v24, v0.t
322; CHECK-NEXT:    fsrm a2
323; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
324; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
325; CHECK-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
326; CHECK-NEXT:    vmv1r.v v0, v17
327; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
328; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24, v0.t
329; CHECK-NEXT:    bltu a0, a1, .LBB10_2
330; CHECK-NEXT:  # %bb.1:
331; CHECK-NEXT:    mv a0, a1
332; CHECK-NEXT:  .LBB10_2:
333; CHECK-NEXT:    vmv1r.v v0, v7
334; CHECK-NEXT:    addi a1, sp, 16
335; CHECK-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
336; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
337; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v16, v0.t
338; CHECK-NEXT:    vmv1r.v v8, v7
339; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
340; CHECK-NEXT:    vfabs.v v16, v24, v0.t
341; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
342; CHECK-NEXT:    vmflt.vf v8, v16, fa5, v0.t
343; CHECK-NEXT:    fsrmi a0, 3
344; CHECK-NEXT:    vmv1r.v v0, v8
345; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
346; CHECK-NEXT:    vfcvt.x.f.v v16, v24, v0.t
347; CHECK-NEXT:    fsrm a0
348; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
349; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
350; CHECK-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
351; CHECK-NEXT:    vmv1r.v v0, v7
352; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
353; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v24, v0.t
354; CHECK-NEXT:    csrr a0, vlenb
355; CHECK-NEXT:    slli a0, a0, 3
356; CHECK-NEXT:    add sp, sp, a0
357; CHECK-NEXT:    .cfi_def_cfa sp, 16
358; CHECK-NEXT:    addi sp, sp, 16
359; CHECK-NEXT:    .cfi_def_cfa_offset 0
360; CHECK-NEXT:    ret
361  %v = call <vscale x 32 x bfloat> @llvm.vp.ceil.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl)
362  ret <vscale x 32 x bfloat> %v
363}
364
365define <vscale x 32 x bfloat> @vp_ceil_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
366; CHECK-LABEL: vp_ceil_vv_nxv32bf16_unmasked:
367; CHECK:       # %bb.0:
368; CHECK-NEXT:    addi sp, sp, -16
369; CHECK-NEXT:    .cfi_def_cfa_offset 16
370; CHECK-NEXT:    csrr a1, vlenb
371; CHECK-NEXT:    slli a1, a1, 3
372; CHECK-NEXT:    sub sp, sp, a1
373; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
374; CHECK-NEXT:    csrr a2, vlenb
375; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
376; CHECK-NEXT:    vmset.m v16
377; CHECK-NEXT:    lui a3, 307200
378; CHECK-NEXT:    slli a1, a2, 1
379; CHECK-NEXT:    srli a2, a2, 2
380; CHECK-NEXT:    fmv.w.x fa5, a3
381; CHECK-NEXT:    sub a3, a0, a1
382; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
383; CHECK-NEXT:    vslidedown.vx v16, v16, a2
384; CHECK-NEXT:    sltu a2, a0, a3
385; CHECK-NEXT:    vmv1r.v v17, v16
386; CHECK-NEXT:    addi a2, a2, -1
387; CHECK-NEXT:    and a2, a2, a3
388; CHECK-NEXT:    vmv1r.v v0, v16
389; CHECK-NEXT:    addi a3, sp, 16
390; CHECK-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
391; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
392; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
393; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
394; CHECK-NEXT:    vfabs.v v8, v24, v0.t
395; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
396; CHECK-NEXT:    vmflt.vf v17, v8, fa5, v0.t
397; CHECK-NEXT:    fsrmi a2, 3
398; CHECK-NEXT:    vmv1r.v v0, v17
399; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
400; CHECK-NEXT:    vfcvt.x.f.v v8, v24, v0.t
401; CHECK-NEXT:    fsrm a2
402; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
403; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
404; CHECK-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
405; CHECK-NEXT:    vmv1r.v v0, v16
406; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
407; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24, v0.t
408; CHECK-NEXT:    bltu a0, a1, .LBB11_2
409; CHECK-NEXT:  # %bb.1:
410; CHECK-NEXT:    mv a0, a1
411; CHECK-NEXT:  .LBB11_2:
412; CHECK-NEXT:    addi a1, sp, 16
413; CHECK-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
414; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
415; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24
416; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
417; CHECK-NEXT:    vfabs.v v24, v16
418; CHECK-NEXT:    vmflt.vf v0, v24, fa5
419; CHECK-NEXT:    fsrmi a0, 3
420; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
421; CHECK-NEXT:    fsrm a0
422; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
423; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
424; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
425; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
426; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
427; CHECK-NEXT:    csrr a0, vlenb
428; CHECK-NEXT:    slli a0, a0, 3
429; CHECK-NEXT:    add sp, sp, a0
430; CHECK-NEXT:    .cfi_def_cfa sp, 16
431; CHECK-NEXT:    addi sp, sp, 16
432; CHECK-NEXT:    .cfi_def_cfa_offset 0
433; CHECK-NEXT:    ret
434  %v = call <vscale x 32 x bfloat> @llvm.vp.ceil.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
435  ret <vscale x 32 x bfloat> %v
436}
437declare <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
438
439define <vscale x 1 x half> @vp_ceil_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
440; ZVFH-LABEL: vp_ceil_vv_nxv1f16:
441; ZVFH:       # %bb.0:
442; ZVFH-NEXT:    lui a1, %hi(.LCPI12_0)
443; ZVFH-NEXT:    flh fa5, %lo(.LCPI12_0)(a1)
444; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
445; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
446; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
447; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
448; ZVFH-NEXT:    fsrmi a0, 3
449; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
450; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
451; ZVFH-NEXT:    fsrm a0
452; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
453; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
454; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
455; ZVFH-NEXT:    ret
456;
457; ZVFHMIN-LABEL: vp_ceil_vv_nxv1f16:
458; ZVFHMIN:       # %bb.0:
459; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
460; ZVFHMIN-NEXT:    vmv1r.v v9, v0
461; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
462; ZVFHMIN-NEXT:    lui a0, 307200
463; ZVFHMIN-NEXT:    vmv1r.v v8, v0
464; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
465; ZVFHMIN-NEXT:    vfabs.v v11, v10, v0.t
466; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
467; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
468; ZVFHMIN-NEXT:    vmflt.vf v8, v11, fa5, v0.t
469; ZVFHMIN-NEXT:    fsrmi a0, 3
470; ZVFHMIN-NEXT:    vmv1r.v v0, v8
471; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
472; ZVFHMIN-NEXT:    vfcvt.x.f.v v11, v10, v0.t
473; ZVFHMIN-NEXT:    fsrm a0
474; ZVFHMIN-NEXT:    vfcvt.f.x.v v11, v11, v0.t
475; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
476; ZVFHMIN-NEXT:    vfsgnj.vv v10, v11, v10, v0.t
477; ZVFHMIN-NEXT:    vmv1r.v v0, v9
478; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
479; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
480; ZVFHMIN-NEXT:    ret
481  %v = call <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
482  ret <vscale x 1 x half> %v
483}
484
485define <vscale x 1 x half> @vp_ceil_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
486; ZVFH-LABEL: vp_ceil_vv_nxv1f16_unmasked:
487; ZVFH:       # %bb.0:
488; ZVFH-NEXT:    lui a1, %hi(.LCPI13_0)
489; ZVFH-NEXT:    flh fa5, %lo(.LCPI13_0)(a1)
490; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
491; ZVFH-NEXT:    vfabs.v v9, v8
492; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
493; ZVFH-NEXT:    fsrmi a0, 3
494; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
495; ZVFH-NEXT:    fsrm a0
496; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
497; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
498; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
499; ZVFH-NEXT:    ret
500;
501; ZVFHMIN-LABEL: vp_ceil_vv_nxv1f16_unmasked:
502; ZVFHMIN:       # %bb.0:
503; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
504; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
505; ZVFHMIN-NEXT:    lui a0, 307200
506; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
507; ZVFHMIN-NEXT:    vfabs.v v8, v9
508; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
509; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
510; ZVFHMIN-NEXT:    fsrmi a0, 3
511; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v9, v0.t
512; ZVFHMIN-NEXT:    fsrm a0
513; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
514; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
515; ZVFHMIN-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
516; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
517; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
518; ZVFHMIN-NEXT:    ret
519  %v = call <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
520  ret <vscale x 1 x half> %v
521}
522
523declare <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
524
525define <vscale x 2 x half> @vp_ceil_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
526; ZVFH-LABEL: vp_ceil_vv_nxv2f16:
527; ZVFH:       # %bb.0:
528; ZVFH-NEXT:    lui a1, %hi(.LCPI14_0)
529; ZVFH-NEXT:    flh fa5, %lo(.LCPI14_0)(a1)
530; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
531; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
532; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
533; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
534; ZVFH-NEXT:    fsrmi a0, 3
535; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
536; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
537; ZVFH-NEXT:    fsrm a0
538; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
539; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
540; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
541; ZVFH-NEXT:    ret
542;
543; ZVFHMIN-LABEL: vp_ceil_vv_nxv2f16:
544; ZVFHMIN:       # %bb.0:
545; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
546; ZVFHMIN-NEXT:    vmv1r.v v9, v0
547; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
548; ZVFHMIN-NEXT:    lui a0, 307200
549; ZVFHMIN-NEXT:    vmv1r.v v8, v0
550; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
551; ZVFHMIN-NEXT:    vfabs.v v11, v10, v0.t
552; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
553; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
554; ZVFHMIN-NEXT:    vmflt.vf v8, v11, fa5, v0.t
555; ZVFHMIN-NEXT:    fsrmi a0, 3
556; ZVFHMIN-NEXT:    vmv.v.v v0, v8
557; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
558; ZVFHMIN-NEXT:    vfcvt.x.f.v v11, v10, v0.t
559; ZVFHMIN-NEXT:    fsrm a0
560; ZVFHMIN-NEXT:    vfcvt.f.x.v v11, v11, v0.t
561; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
562; ZVFHMIN-NEXT:    vfsgnj.vv v10, v11, v10, v0.t
563; ZVFHMIN-NEXT:    vmv1r.v v0, v9
564; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
565; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
566; ZVFHMIN-NEXT:    ret
567  %v = call <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
568  ret <vscale x 2 x half> %v
569}
570
571define <vscale x 2 x half> @vp_ceil_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
572; ZVFH-LABEL: vp_ceil_vv_nxv2f16_unmasked:
573; ZVFH:       # %bb.0:
574; ZVFH-NEXT:    lui a1, %hi(.LCPI15_0)
575; ZVFH-NEXT:    flh fa5, %lo(.LCPI15_0)(a1)
576; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
577; ZVFH-NEXT:    vfabs.v v9, v8
578; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
579; ZVFH-NEXT:    fsrmi a0, 3
580; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
581; ZVFH-NEXT:    fsrm a0
582; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
583; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
584; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
585; ZVFH-NEXT:    ret
586;
587; ZVFHMIN-LABEL: vp_ceil_vv_nxv2f16_unmasked:
588; ZVFHMIN:       # %bb.0:
589; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
590; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
591; ZVFHMIN-NEXT:    lui a0, 307200
592; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
593; ZVFHMIN-NEXT:    vfabs.v v8, v9
594; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
595; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
596; ZVFHMIN-NEXT:    fsrmi a0, 3
597; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v9, v0.t
598; ZVFHMIN-NEXT:    fsrm a0
599; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
600; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
601; ZVFHMIN-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
602; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
603; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
604; ZVFHMIN-NEXT:    ret
605  %v = call <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
606  ret <vscale x 2 x half> %v
607}
608
609declare <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
610
611define <vscale x 4 x half> @vp_ceil_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
612; ZVFH-LABEL: vp_ceil_vv_nxv4f16:
613; ZVFH:       # %bb.0:
614; ZVFH-NEXT:    lui a1, %hi(.LCPI16_0)
615; ZVFH-NEXT:    flh fa5, %lo(.LCPI16_0)(a1)
616; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
617; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
618; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
619; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
620; ZVFH-NEXT:    fsrmi a0, 3
621; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
622; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
623; ZVFH-NEXT:    fsrm a0
624; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
625; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
626; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
627; ZVFH-NEXT:    ret
628;
629; ZVFHMIN-LABEL: vp_ceil_vv_nxv4f16:
630; ZVFHMIN:       # %bb.0:
631; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
632; ZVFHMIN-NEXT:    vmv1r.v v9, v0
633; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
634; ZVFHMIN-NEXT:    lui a0, 307200
635; ZVFHMIN-NEXT:    vmv1r.v v8, v0
636; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
637; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
638; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
639; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
640; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
641; ZVFHMIN-NEXT:    fsrmi a0, 3
642; ZVFHMIN-NEXT:    vmv1r.v v0, v8
643; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
644; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
645; ZVFHMIN-NEXT:    fsrm a0
646; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
647; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
648; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
649; ZVFHMIN-NEXT:    vmv1r.v v0, v9
650; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
651; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
652; ZVFHMIN-NEXT:    ret
653  %v = call <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
654  ret <vscale x 4 x half> %v
655}
656
657define <vscale x 4 x half> @vp_ceil_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
658; ZVFH-LABEL: vp_ceil_vv_nxv4f16_unmasked:
659; ZVFH:       # %bb.0:
660; ZVFH-NEXT:    lui a1, %hi(.LCPI17_0)
661; ZVFH-NEXT:    flh fa5, %lo(.LCPI17_0)(a1)
662; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
663; ZVFH-NEXT:    vfabs.v v9, v8
664; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
665; ZVFH-NEXT:    fsrmi a0, 3
666; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
667; ZVFH-NEXT:    fsrm a0
668; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
669; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
670; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
671; ZVFH-NEXT:    ret
672;
673; ZVFHMIN-LABEL: vp_ceil_vv_nxv4f16_unmasked:
674; ZVFHMIN:       # %bb.0:
675; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
676; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
677; ZVFHMIN-NEXT:    lui a0, 307200
678; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
679; ZVFHMIN-NEXT:    vfabs.v v8, v10
680; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
681; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
682; ZVFHMIN-NEXT:    fsrmi a0, 3
683; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
684; ZVFHMIN-NEXT:    fsrm a0
685; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
686; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
687; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
688; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
689; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
690; ZVFHMIN-NEXT:    ret
691  %v = call <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
692  ret <vscale x 4 x half> %v
693}
694
695declare <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
696
697define <vscale x 8 x half> @vp_ceil_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
698; ZVFH-LABEL: vp_ceil_vv_nxv8f16:
699; ZVFH:       # %bb.0:
700; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
701; ZVFH-NEXT:    vmv1r.v v10, v0
702; ZVFH-NEXT:    lui a0, %hi(.LCPI18_0)
703; ZVFH-NEXT:    flh fa5, %lo(.LCPI18_0)(a0)
704; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
705; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
706; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
707; ZVFH-NEXT:    fsrmi a0, 3
708; ZVFH-NEXT:    vmv1r.v v0, v10
709; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
710; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
711; ZVFH-NEXT:    fsrm a0
712; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
713; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
714; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
715; ZVFH-NEXT:    ret
716;
717; ZVFHMIN-LABEL: vp_ceil_vv_nxv8f16:
718; ZVFHMIN:       # %bb.0:
719; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
720; ZVFHMIN-NEXT:    vmv1r.v v10, v0
721; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
722; ZVFHMIN-NEXT:    lui a0, 307200
723; ZVFHMIN-NEXT:    vmv1r.v v8, v0
724; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
725; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
726; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
727; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
728; ZVFHMIN-NEXT:    vmflt.vf v8, v16, fa5, v0.t
729; ZVFHMIN-NEXT:    fsrmi a0, 3
730; ZVFHMIN-NEXT:    vmv1r.v v0, v8
731; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
732; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
733; ZVFHMIN-NEXT:    fsrm a0
734; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
735; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
736; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
737; ZVFHMIN-NEXT:    vmv1r.v v0, v10
738; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
739; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
740; ZVFHMIN-NEXT:    ret
741  %v = call <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
742  ret <vscale x 8 x half> %v
743}
744
745define <vscale x 8 x half> @vp_ceil_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
746; ZVFH-LABEL: vp_ceil_vv_nxv8f16_unmasked:
747; ZVFH:       # %bb.0:
748; ZVFH-NEXT:    lui a1, %hi(.LCPI19_0)
749; ZVFH-NEXT:    flh fa5, %lo(.LCPI19_0)(a1)
750; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
751; ZVFH-NEXT:    vfabs.v v10, v8
752; ZVFH-NEXT:    vmflt.vf v0, v10, fa5
753; ZVFH-NEXT:    fsrmi a0, 3
754; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
755; ZVFH-NEXT:    fsrm a0
756; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
757; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
758; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
759; ZVFH-NEXT:    ret
760;
761; ZVFHMIN-LABEL: vp_ceil_vv_nxv8f16_unmasked:
762; ZVFHMIN:       # %bb.0:
763; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
764; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
765; ZVFHMIN-NEXT:    lui a0, 307200
766; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
767; ZVFHMIN-NEXT:    vfabs.v v8, v12
768; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
769; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
770; ZVFHMIN-NEXT:    fsrmi a0, 3
771; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
772; ZVFHMIN-NEXT:    fsrm a0
773; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
774; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
775; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
776; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
777; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
778; ZVFHMIN-NEXT:    ret
779  %v = call <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
780  ret <vscale x 8 x half> %v
781}
782
783declare <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32)
784
785define <vscale x 16 x half> @vp_ceil_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
786; ZVFH-LABEL: vp_ceil_vv_nxv16f16:
787; ZVFH:       # %bb.0:
788; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
789; ZVFH-NEXT:    vmv1r.v v12, v0
790; ZVFH-NEXT:    lui a0, %hi(.LCPI20_0)
791; ZVFH-NEXT:    flh fa5, %lo(.LCPI20_0)(a0)
792; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
793; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
794; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
795; ZVFH-NEXT:    fsrmi a0, 3
796; ZVFH-NEXT:    vmv1r.v v0, v12
797; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
798; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
799; ZVFH-NEXT:    fsrm a0
800; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
801; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
802; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
803; ZVFH-NEXT:    ret
804;
805; ZVFHMIN-LABEL: vp_ceil_vv_nxv16f16:
806; ZVFHMIN:       # %bb.0:
807; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
808; ZVFHMIN-NEXT:    vmv1r.v v12, v0
809; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
810; ZVFHMIN-NEXT:    lui a0, 307200
811; ZVFHMIN-NEXT:    vmv1r.v v8, v0
812; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
813; ZVFHMIN-NEXT:    vfabs.v v24, v16, v0.t
814; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
815; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
816; ZVFHMIN-NEXT:    vmflt.vf v8, v24, fa5, v0.t
817; ZVFHMIN-NEXT:    fsrmi a0, 3
818; ZVFHMIN-NEXT:    vmv1r.v v0, v8
819; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
820; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
821; ZVFHMIN-NEXT:    fsrm a0
822; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
823; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
824; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
825; ZVFHMIN-NEXT:    vmv1r.v v0, v12
826; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
827; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
828; ZVFHMIN-NEXT:    ret
829  %v = call <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
830  ret <vscale x 16 x half> %v
831}
832
833define <vscale x 16 x half> @vp_ceil_vv_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
834; ZVFH-LABEL: vp_ceil_vv_nxv16f16_unmasked:
835; ZVFH:       # %bb.0:
836; ZVFH-NEXT:    lui a1, %hi(.LCPI21_0)
837; ZVFH-NEXT:    flh fa5, %lo(.LCPI21_0)(a1)
838; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
839; ZVFH-NEXT:    vfabs.v v12, v8
840; ZVFH-NEXT:    vmflt.vf v0, v12, fa5
841; ZVFH-NEXT:    fsrmi a0, 3
842; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
843; ZVFH-NEXT:    fsrm a0
844; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
845; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
846; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
847; ZVFH-NEXT:    ret
848;
849; ZVFHMIN-LABEL: vp_ceil_vv_nxv16f16_unmasked:
850; ZVFHMIN:       # %bb.0:
851; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
852; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
853; ZVFHMIN-NEXT:    lui a0, 307200
854; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
855; ZVFHMIN-NEXT:    vfabs.v v8, v16
856; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
857; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
858; ZVFHMIN-NEXT:    fsrmi a0, 3
859; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v16, v0.t
860; ZVFHMIN-NEXT:    fsrm a0
861; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
862; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
863; ZVFHMIN-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
864; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
865; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
866; ZVFHMIN-NEXT:    ret
867  %v = call <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
868  ret <vscale x 16 x half> %v
869}
870
871declare <vscale x 32 x half> @llvm.vp.ceil.nxv32f16(<vscale x 32 x half>, <vscale x 32 x i1>, i32)
872
873define <vscale x 32 x half> @vp_ceil_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
874; ZVFH-LABEL: vp_ceil_vv_nxv32f16:
875; ZVFH:       # %bb.0:
876; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
877; ZVFH-NEXT:    vmv1r.v v16, v0
878; ZVFH-NEXT:    lui a0, %hi(.LCPI22_0)
879; ZVFH-NEXT:    flh fa5, %lo(.LCPI22_0)(a0)
880; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
881; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
882; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
883; ZVFH-NEXT:    fsrmi a0, 3
884; ZVFH-NEXT:    vmv1r.v v0, v16
885; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
886; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
887; ZVFH-NEXT:    fsrm a0
888; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
889; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
890; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
891; ZVFH-NEXT:    ret
892;
893; ZVFHMIN-LABEL: vp_ceil_vv_nxv32f16:
894; ZVFHMIN:       # %bb.0:
895; ZVFHMIN-NEXT:    addi sp, sp, -16
896; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
897; ZVFHMIN-NEXT:    csrr a1, vlenb
898; ZVFHMIN-NEXT:    slli a1, a1, 3
899; ZVFHMIN-NEXT:    sub sp, sp, a1
900; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
901; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
902; ZVFHMIN-NEXT:    vmv1r.v v7, v0
903; ZVFHMIN-NEXT:    csrr a2, vlenb
904; ZVFHMIN-NEXT:    lui a3, 307200
905; ZVFHMIN-NEXT:    slli a1, a2, 1
906; ZVFHMIN-NEXT:    srli a2, a2, 2
907; ZVFHMIN-NEXT:    fmv.w.x fa5, a3
908; ZVFHMIN-NEXT:    sub a3, a0, a1
909; ZVFHMIN-NEXT:    vslidedown.vx v17, v0, a2
910; ZVFHMIN-NEXT:    sltu a2, a0, a3
911; ZVFHMIN-NEXT:    vmv1r.v v18, v17
912; ZVFHMIN-NEXT:    addi a2, a2, -1
913; ZVFHMIN-NEXT:    and a2, a2, a3
914; ZVFHMIN-NEXT:    vmv1r.v v0, v17
915; ZVFHMIN-NEXT:    addi a3, sp, 16
916; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
917; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
918; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
919; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
920; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
921; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
922; ZVFHMIN-NEXT:    vmflt.vf v18, v8, fa5, v0.t
923; ZVFHMIN-NEXT:    fsrmi a2, 3
924; ZVFHMIN-NEXT:    vmv1r.v v0, v18
925; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
926; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
927; ZVFHMIN-NEXT:    fsrm a2
928; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
929; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
930; ZVFHMIN-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
931; ZVFHMIN-NEXT:    vmv1r.v v0, v17
932; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
933; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24, v0.t
934; ZVFHMIN-NEXT:    bltu a0, a1, .LBB22_2
935; ZVFHMIN-NEXT:  # %bb.1:
936; ZVFHMIN-NEXT:    mv a0, a1
937; ZVFHMIN-NEXT:  .LBB22_2:
938; ZVFHMIN-NEXT:    vmv1r.v v0, v7
939; ZVFHMIN-NEXT:    addi a1, sp, 16
940; ZVFHMIN-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
941; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
942; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
943; ZVFHMIN-NEXT:    vmv1r.v v8, v7
944; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
945; ZVFHMIN-NEXT:    vfabs.v v16, v24, v0.t
946; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
947; ZVFHMIN-NEXT:    vmflt.vf v8, v16, fa5, v0.t
948; ZVFHMIN-NEXT:    fsrmi a0, 3
949; ZVFHMIN-NEXT:    vmv1r.v v0, v8
950; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
951; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v24, v0.t
952; ZVFHMIN-NEXT:    fsrm a0
953; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
954; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
955; ZVFHMIN-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
956; ZVFHMIN-NEXT:    vmv1r.v v0, v7
957; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
958; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24, v0.t
959; ZVFHMIN-NEXT:    csrr a0, vlenb
960; ZVFHMIN-NEXT:    slli a0, a0, 3
961; ZVFHMIN-NEXT:    add sp, sp, a0
962; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
963; ZVFHMIN-NEXT:    addi sp, sp, 16
964; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
965; ZVFHMIN-NEXT:    ret
966  %v = call <vscale x 32 x half> @llvm.vp.ceil.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
967  ret <vscale x 32 x half> %v
968}
969
970define <vscale x 32 x half> @vp_ceil_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
971; ZVFH-LABEL: vp_ceil_vv_nxv32f16_unmasked:
972; ZVFH:       # %bb.0:
973; ZVFH-NEXT:    lui a1, %hi(.LCPI23_0)
974; ZVFH-NEXT:    flh fa5, %lo(.LCPI23_0)(a1)
975; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
976; ZVFH-NEXT:    vfabs.v v16, v8
977; ZVFH-NEXT:    vmflt.vf v0, v16, fa5
978; ZVFH-NEXT:    fsrmi a0, 3
979; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
980; ZVFH-NEXT:    fsrm a0
981; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
982; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
983; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
984; ZVFH-NEXT:    ret
985;
986; ZVFHMIN-LABEL: vp_ceil_vv_nxv32f16_unmasked:
987; ZVFHMIN:       # %bb.0:
988; ZVFHMIN-NEXT:    addi sp, sp, -16
989; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
990; ZVFHMIN-NEXT:    csrr a1, vlenb
991; ZVFHMIN-NEXT:    slli a1, a1, 3
992; ZVFHMIN-NEXT:    sub sp, sp, a1
993; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
994; ZVFHMIN-NEXT:    csrr a2, vlenb
995; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
996; ZVFHMIN-NEXT:    vmset.m v16
997; ZVFHMIN-NEXT:    lui a3, 307200
998; ZVFHMIN-NEXT:    slli a1, a2, 1
999; ZVFHMIN-NEXT:    srli a2, a2, 2
1000; ZVFHMIN-NEXT:    fmv.w.x fa5, a3
1001; ZVFHMIN-NEXT:    sub a3, a0, a1
1002; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
1003; ZVFHMIN-NEXT:    vslidedown.vx v16, v16, a2
1004; ZVFHMIN-NEXT:    sltu a2, a0, a3
1005; ZVFHMIN-NEXT:    vmv1r.v v17, v16
1006; ZVFHMIN-NEXT:    addi a2, a2, -1
1007; ZVFHMIN-NEXT:    and a2, a2, a3
1008; ZVFHMIN-NEXT:    vmv1r.v v0, v16
1009; ZVFHMIN-NEXT:    addi a3, sp, 16
1010; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
1011; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
1012; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
1013; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
1014; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
1015; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
1016; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
1017; ZVFHMIN-NEXT:    fsrmi a2, 3
1018; ZVFHMIN-NEXT:    vmv1r.v v0, v17
1019; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
1020; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
1021; ZVFHMIN-NEXT:    fsrm a2
1022; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
1023; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
1024; ZVFHMIN-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
1025; ZVFHMIN-NEXT:    vmv1r.v v0, v16
1026; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
1027; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24, v0.t
1028; ZVFHMIN-NEXT:    bltu a0, a1, .LBB23_2
1029; ZVFHMIN-NEXT:  # %bb.1:
1030; ZVFHMIN-NEXT:    mv a0, a1
1031; ZVFHMIN-NEXT:  .LBB23_2:
1032; ZVFHMIN-NEXT:    addi a1, sp, 16
1033; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
1034; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
1035; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
1036; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
1037; ZVFHMIN-NEXT:    vfabs.v v24, v16
1038; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5
1039; ZVFHMIN-NEXT:    fsrmi a0, 3
1040; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
1041; ZVFHMIN-NEXT:    fsrm a0
1042; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
1043; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
1044; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
1045; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
1046; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
1047; ZVFHMIN-NEXT:    csrr a0, vlenb
1048; ZVFHMIN-NEXT:    slli a0, a0, 3
1049; ZVFHMIN-NEXT:    add sp, sp, a0
1050; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
1051; ZVFHMIN-NEXT:    addi sp, sp, 16
1052; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
1053; ZVFHMIN-NEXT:    ret
1054  %v = call <vscale x 32 x half> @llvm.vp.ceil.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
1055  ret <vscale x 32 x half> %v
1056}
1057
1058declare <vscale x 1 x float> @llvm.vp.ceil.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
1059
1060define <vscale x 1 x float> @vp_ceil_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1061; CHECK-LABEL: vp_ceil_vv_nxv1f32:
1062; CHECK:       # %bb.0:
1063; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1064; CHECK-NEXT:    vfabs.v v9, v8, v0.t
1065; CHECK-NEXT:    lui a0, 307200
1066; CHECK-NEXT:    fmv.w.x fa5, a0
1067; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
1068; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
1069; CHECK-NEXT:    fsrmi a0, 3
1070; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
1071; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
1072; CHECK-NEXT:    fsrm a0
1073; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
1074; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
1075; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
1076; CHECK-NEXT:    ret
1077  %v = call <vscale x 1 x float> @llvm.vp.ceil.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
1078  ret <vscale x 1 x float> %v
1079}
1080
1081define <vscale x 1 x float> @vp_ceil_vv_nxv1f32_unmasked(<vscale x 1 x float> %va, i32 zeroext %evl) {
1082; CHECK-LABEL: vp_ceil_vv_nxv1f32_unmasked:
1083; CHECK:       # %bb.0:
1084; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1085; CHECK-NEXT:    vfabs.v v9, v8
1086; CHECK-NEXT:    lui a0, 307200
1087; CHECK-NEXT:    fmv.w.x fa5, a0
1088; CHECK-NEXT:    vmflt.vf v0, v9, fa5
1089; CHECK-NEXT:    fsrmi a0, 3
1090; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
1091; CHECK-NEXT:    fsrm a0
1092; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
1093; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
1094; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
1095; CHECK-NEXT:    ret
1096  %v = call <vscale x 1 x float> @llvm.vp.ceil.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1097  ret <vscale x 1 x float> %v
1098}
1099
1100declare <vscale x 2 x float> @llvm.vp.ceil.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
1101
1102define <vscale x 2 x float> @vp_ceil_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1103; CHECK-LABEL: vp_ceil_vv_nxv2f32:
1104; CHECK:       # %bb.0:
1105; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1106; CHECK-NEXT:    vfabs.v v9, v8, v0.t
1107; CHECK-NEXT:    lui a0, 307200
1108; CHECK-NEXT:    fmv.w.x fa5, a0
1109; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
1110; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
1111; CHECK-NEXT:    fsrmi a0, 3
1112; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
1113; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
1114; CHECK-NEXT:    fsrm a0
1115; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
1116; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
1117; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
1118; CHECK-NEXT:    ret
1119  %v = call <vscale x 2 x float> @llvm.vp.ceil.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
1120  ret <vscale x 2 x float> %v
1121}
1122
1123define <vscale x 2 x float> @vp_ceil_vv_nxv2f32_unmasked(<vscale x 2 x float> %va, i32 zeroext %evl) {
1124; CHECK-LABEL: vp_ceil_vv_nxv2f32_unmasked:
1125; CHECK:       # %bb.0:
1126; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1127; CHECK-NEXT:    vfabs.v v9, v8
1128; CHECK-NEXT:    lui a0, 307200
1129; CHECK-NEXT:    fmv.w.x fa5, a0
1130; CHECK-NEXT:    vmflt.vf v0, v9, fa5
1131; CHECK-NEXT:    fsrmi a0, 3
1132; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
1133; CHECK-NEXT:    fsrm a0
1134; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
1135; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
1136; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
1137; CHECK-NEXT:    ret
1138  %v = call <vscale x 2 x float> @llvm.vp.ceil.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1139  ret <vscale x 2 x float> %v
1140}
1141
1142declare <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
1143
1144define <vscale x 4 x float> @vp_ceil_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1145; CHECK-LABEL: vp_ceil_vv_nxv4f32:
1146; CHECK:       # %bb.0:
1147; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1148; CHECK-NEXT:    vmv1r.v v10, v0
1149; CHECK-NEXT:    vfabs.v v12, v8, v0.t
1150; CHECK-NEXT:    lui a0, 307200
1151; CHECK-NEXT:    fmv.w.x fa5, a0
1152; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
1153; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
1154; CHECK-NEXT:    fsrmi a0, 3
1155; CHECK-NEXT:    vmv1r.v v0, v10
1156; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1157; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
1158; CHECK-NEXT:    fsrm a0
1159; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
1160; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
1161; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
1162; CHECK-NEXT:    ret
1163  %v = call <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
1164  ret <vscale x 4 x float> %v
1165}
1166
1167define <vscale x 4 x float> @vp_ceil_vv_nxv4f32_unmasked(<vscale x 4 x float> %va, i32 zeroext %evl) {
1168; CHECK-LABEL: vp_ceil_vv_nxv4f32_unmasked:
1169; CHECK:       # %bb.0:
1170; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1171; CHECK-NEXT:    vfabs.v v10, v8
1172; CHECK-NEXT:    lui a0, 307200
1173; CHECK-NEXT:    fmv.w.x fa5, a0
1174; CHECK-NEXT:    vmflt.vf v0, v10, fa5
1175; CHECK-NEXT:    fsrmi a0, 3
1176; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
1177; CHECK-NEXT:    fsrm a0
1178; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
1179; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
1180; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
1181; CHECK-NEXT:    ret
1182  %v = call <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1183  ret <vscale x 4 x float> %v
1184}
1185
1186declare <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
1187
1188define <vscale x 8 x float> @vp_ceil_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1189; CHECK-LABEL: vp_ceil_vv_nxv8f32:
1190; CHECK:       # %bb.0:
1191; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1192; CHECK-NEXT:    vmv1r.v v12, v0
1193; CHECK-NEXT:    vfabs.v v16, v8, v0.t
1194; CHECK-NEXT:    lui a0, 307200
1195; CHECK-NEXT:    fmv.w.x fa5, a0
1196; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1197; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
1198; CHECK-NEXT:    fsrmi a0, 3
1199; CHECK-NEXT:    vmv1r.v v0, v12
1200; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1201; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
1202; CHECK-NEXT:    fsrm a0
1203; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
1204; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1205; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
1206; CHECK-NEXT:    ret
1207  %v = call <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
1208  ret <vscale x 8 x float> %v
1209}
1210
1211define <vscale x 8 x float> @vp_ceil_vv_nxv8f32_unmasked(<vscale x 8 x float> %va, i32 zeroext %evl) {
1212; CHECK-LABEL: vp_ceil_vv_nxv8f32_unmasked:
1213; CHECK:       # %bb.0:
1214; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1215; CHECK-NEXT:    vfabs.v v12, v8
1216; CHECK-NEXT:    lui a0, 307200
1217; CHECK-NEXT:    fmv.w.x fa5, a0
1218; CHECK-NEXT:    vmflt.vf v0, v12, fa5
1219; CHECK-NEXT:    fsrmi a0, 3
1220; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
1221; CHECK-NEXT:    fsrm a0
1222; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
1223; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1224; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
1225; CHECK-NEXT:    ret
1226  %v = call <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1227  ret <vscale x 8 x float> %v
1228}
1229
1230declare <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
1231
1232define <vscale x 16 x float> @vp_ceil_vv_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1233; CHECK-LABEL: vp_ceil_vv_nxv16f32:
1234; CHECK:       # %bb.0:
1235; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1236; CHECK-NEXT:    vmv1r.v v16, v0
1237; CHECK-NEXT:    vfabs.v v24, v8, v0.t
1238; CHECK-NEXT:    lui a0, 307200
1239; CHECK-NEXT:    fmv.w.x fa5, a0
1240; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
1241; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
1242; CHECK-NEXT:    fsrmi a0, 3
1243; CHECK-NEXT:    vmv1r.v v0, v16
1244; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
1245; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
1246; CHECK-NEXT:    fsrm a0
1247; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
1248; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
1249; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
1250; CHECK-NEXT:    ret
1251  %v = call <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
1252  ret <vscale x 16 x float> %v
1253}
1254
1255define <vscale x 16 x float> @vp_ceil_vv_nxv16f32_unmasked(<vscale x 16 x float> %va, i32 zeroext %evl) {
1256; CHECK-LABEL: vp_ceil_vv_nxv16f32_unmasked:
1257; CHECK:       # %bb.0:
1258; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1259; CHECK-NEXT:    vfabs.v v16, v8
1260; CHECK-NEXT:    lui a0, 307200
1261; CHECK-NEXT:    fmv.w.x fa5, a0
1262; CHECK-NEXT:    vmflt.vf v0, v16, fa5
1263; CHECK-NEXT:    fsrmi a0, 3
1264; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
1265; CHECK-NEXT:    fsrm a0
1266; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
1267; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
1268; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
1269; CHECK-NEXT:    ret
1270  %v = call <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1271  ret <vscale x 16 x float> %v
1272}
1273
1274declare <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
1275
1276define <vscale x 1 x double> @vp_ceil_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1277; CHECK-LABEL: vp_ceil_vv_nxv1f64:
1278; CHECK:       # %bb.0:
1279; CHECK-NEXT:    lui a1, %hi(.LCPI34_0)
1280; CHECK-NEXT:    fld fa5, %lo(.LCPI34_0)(a1)
1281; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1282; CHECK-NEXT:    vfabs.v v9, v8, v0.t
1283; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
1284; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
1285; CHECK-NEXT:    fsrmi a0, 3
1286; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
1287; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
1288; CHECK-NEXT:    fsrm a0
1289; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
1290; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
1291; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
1292; CHECK-NEXT:    ret
1293  %v = call <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
1294  ret <vscale x 1 x double> %v
1295}
1296
1297define <vscale x 1 x double> @vp_ceil_vv_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
1298; CHECK-LABEL: vp_ceil_vv_nxv1f64_unmasked:
1299; CHECK:       # %bb.0:
1300; CHECK-NEXT:    lui a1, %hi(.LCPI35_0)
1301; CHECK-NEXT:    fld fa5, %lo(.LCPI35_0)(a1)
1302; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1303; CHECK-NEXT:    vfabs.v v9, v8
1304; CHECK-NEXT:    vmflt.vf v0, v9, fa5
1305; CHECK-NEXT:    fsrmi a0, 3
1306; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
1307; CHECK-NEXT:    fsrm a0
1308; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
1309; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
1310; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
1311; CHECK-NEXT:    ret
1312  %v = call <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1313  ret <vscale x 1 x double> %v
1314}
1315
1316declare <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
1317
1318define <vscale x 2 x double> @vp_ceil_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1319; CHECK-LABEL: vp_ceil_vv_nxv2f64:
1320; CHECK:       # %bb.0:
1321; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1322; CHECK-NEXT:    vmv1r.v v10, v0
1323; CHECK-NEXT:    lui a0, %hi(.LCPI36_0)
1324; CHECK-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
1325; CHECK-NEXT:    vfabs.v v12, v8, v0.t
1326; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
1327; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
1328; CHECK-NEXT:    fsrmi a0, 3
1329; CHECK-NEXT:    vmv1r.v v0, v10
1330; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
1331; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
1332; CHECK-NEXT:    fsrm a0
1333; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
1334; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
1335; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
1336; CHECK-NEXT:    ret
1337  %v = call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
1338  ret <vscale x 2 x double> %v
1339}
1340
1341define <vscale x 2 x double> @vp_ceil_vv_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
1342; CHECK-LABEL: vp_ceil_vv_nxv2f64_unmasked:
1343; CHECK:       # %bb.0:
1344; CHECK-NEXT:    lui a1, %hi(.LCPI37_0)
1345; CHECK-NEXT:    fld fa5, %lo(.LCPI37_0)(a1)
1346; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1347; CHECK-NEXT:    vfabs.v v10, v8
1348; CHECK-NEXT:    vmflt.vf v0, v10, fa5
1349; CHECK-NEXT:    fsrmi a0, 3
1350; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
1351; CHECK-NEXT:    fsrm a0
1352; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
1353; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
1354; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
1355; CHECK-NEXT:    ret
1356  %v = call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1357  ret <vscale x 2 x double> %v
1358}
1359
1360declare <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
1361
1362define <vscale x 4 x double> @vp_ceil_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1363; CHECK-LABEL: vp_ceil_vv_nxv4f64:
1364; CHECK:       # %bb.0:
1365; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1366; CHECK-NEXT:    vmv1r.v v12, v0
1367; CHECK-NEXT:    lui a0, %hi(.LCPI38_0)
1368; CHECK-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
1369; CHECK-NEXT:    vfabs.v v16, v8, v0.t
1370; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
1371; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
1372; CHECK-NEXT:    fsrmi a0, 3
1373; CHECK-NEXT:    vmv1r.v v0, v12
1374; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
1375; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
1376; CHECK-NEXT:    fsrm a0
1377; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
1378; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
1379; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
1380; CHECK-NEXT:    ret
1381  %v = call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
1382  ret <vscale x 4 x double> %v
1383}
1384
1385define <vscale x 4 x double> @vp_ceil_vv_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
1386; CHECK-LABEL: vp_ceil_vv_nxv4f64_unmasked:
1387; CHECK:       # %bb.0:
1388; CHECK-NEXT:    lui a1, %hi(.LCPI39_0)
1389; CHECK-NEXT:    fld fa5, %lo(.LCPI39_0)(a1)
1390; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1391; CHECK-NEXT:    vfabs.v v12, v8
1392; CHECK-NEXT:    vmflt.vf v0, v12, fa5
1393; CHECK-NEXT:    fsrmi a0, 3
1394; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
1395; CHECK-NEXT:    fsrm a0
1396; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
1397; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
1398; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
1399; CHECK-NEXT:    ret
1400  %v = call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1401  ret <vscale x 4 x double> %v
1402}
1403
1404declare <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32)
1405
1406define <vscale x 7 x double> @vp_ceil_vv_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
1407; CHECK-LABEL: vp_ceil_vv_nxv7f64:
1408; CHECK:       # %bb.0:
1409; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1410; CHECK-NEXT:    vmv1r.v v16, v0
1411; CHECK-NEXT:    lui a0, %hi(.LCPI40_0)
1412; CHECK-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
1413; CHECK-NEXT:    vfabs.v v24, v8, v0.t
1414; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1415; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
1416; CHECK-NEXT:    fsrmi a0, 3
1417; CHECK-NEXT:    vmv1r.v v0, v16
1418; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1419; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
1420; CHECK-NEXT:    fsrm a0
1421; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
1422; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1423; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
1424; CHECK-NEXT:    ret
1425  %v = call <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
1426  ret <vscale x 7 x double> %v
1427}
1428
1429define <vscale x 7 x double> @vp_ceil_vv_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
1430; CHECK-LABEL: vp_ceil_vv_nxv7f64_unmasked:
1431; CHECK:       # %bb.0:
1432; CHECK-NEXT:    lui a1, %hi(.LCPI41_0)
1433; CHECK-NEXT:    fld fa5, %lo(.LCPI41_0)(a1)
1434; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1435; CHECK-NEXT:    vfabs.v v16, v8
1436; CHECK-NEXT:    vmflt.vf v0, v16, fa5
1437; CHECK-NEXT:    fsrmi a0, 3
1438; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
1439; CHECK-NEXT:    fsrm a0
1440; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
1441; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1442; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
1443; CHECK-NEXT:    ret
1444  %v = call <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> splat (i1 true), i32 %evl)
1445  ret <vscale x 7 x double> %v
1446}
1447
1448declare <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
1449
1450define <vscale x 8 x double> @vp_ceil_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1451; CHECK-LABEL: vp_ceil_vv_nxv8f64:
1452; CHECK:       # %bb.0:
1453; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1454; CHECK-NEXT:    vmv1r.v v16, v0
1455; CHECK-NEXT:    lui a0, %hi(.LCPI42_0)
1456; CHECK-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
1457; CHECK-NEXT:    vfabs.v v24, v8, v0.t
1458; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1459; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
1460; CHECK-NEXT:    fsrmi a0, 3
1461; CHECK-NEXT:    vmv1r.v v0, v16
1462; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1463; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
1464; CHECK-NEXT:    fsrm a0
1465; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
1466; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1467; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
1468; CHECK-NEXT:    ret
1469  %v = call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
1470  ret <vscale x 8 x double> %v
1471}
1472
1473define <vscale x 8 x double> @vp_ceil_vv_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
1474; CHECK-LABEL: vp_ceil_vv_nxv8f64_unmasked:
1475; CHECK:       # %bb.0:
1476; CHECK-NEXT:    lui a1, %hi(.LCPI43_0)
1477; CHECK-NEXT:    fld fa5, %lo(.LCPI43_0)(a1)
1478; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1479; CHECK-NEXT:    vfabs.v v16, v8
1480; CHECK-NEXT:    vmflt.vf v0, v16, fa5
1481; CHECK-NEXT:    fsrmi a0, 3
1482; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
1483; CHECK-NEXT:    fsrm a0
1484; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
1485; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1486; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
1487; CHECK-NEXT:    ret
1488  %v = call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1489  ret <vscale x 8 x double> %v
1490}
1491
1492; Test splitting.
1493declare <vscale x 16 x double> @llvm.vp.ceil.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32)
1494
1495define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1496; CHECK-LABEL: vp_ceil_vv_nxv16f64:
1497; CHECK:       # %bb.0:
1498; CHECK-NEXT:    addi sp, sp, -16
1499; CHECK-NEXT:    .cfi_def_cfa_offset 16
1500; CHECK-NEXT:    csrr a1, vlenb
1501; CHECK-NEXT:    slli a1, a1, 3
1502; CHECK-NEXT:    sub sp, sp, a1
1503; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1504; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
1505; CHECK-NEXT:    vmv1r.v v7, v0
1506; CHECK-NEXT:    csrr a1, vlenb
1507; CHECK-NEXT:    lui a2, %hi(.LCPI44_0)
1508; CHECK-NEXT:    srli a3, a1, 3
1509; CHECK-NEXT:    fld fa5, %lo(.LCPI44_0)(a2)
1510; CHECK-NEXT:    sub a2, a0, a1
1511; CHECK-NEXT:    vslidedown.vx v6, v0, a3
1512; CHECK-NEXT:    sltu a3, a0, a2
1513; CHECK-NEXT:    addi a3, a3, -1
1514; CHECK-NEXT:    and a2, a3, a2
1515; CHECK-NEXT:    vmv1r.v v0, v6
1516; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
1517; CHECK-NEXT:    vfabs.v v24, v16, v0.t
1518; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1519; CHECK-NEXT:    vmflt.vf v6, v24, fa5, v0.t
1520; CHECK-NEXT:    fsrmi a2, 3
1521; CHECK-NEXT:    vmv1r.v v0, v6
1522; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1523; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
1524; CHECK-NEXT:    addi a3, sp, 16
1525; CHECK-NEXT:    vs8r.v v24, (a3) # Unknown-size Folded Spill
1526; CHECK-NEXT:    fsrm a2
1527; CHECK-NEXT:    addi a2, sp, 16
1528; CHECK-NEXT:    vl8r.v v24, (a2) # Unknown-size Folded Reload
1529; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
1530; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1531; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
1532; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
1533; CHECK-NEXT:    bltu a0, a1, .LBB44_2
1534; CHECK-NEXT:  # %bb.1:
1535; CHECK-NEXT:    mv a0, a1
1536; CHECK-NEXT:  .LBB44_2:
1537; CHECK-NEXT:    vmv1r.v v0, v7
1538; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1539; CHECK-NEXT:    vfabs.v v16, v8, v0.t
1540; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1541; CHECK-NEXT:    vmflt.vf v7, v16, fa5, v0.t
1542; CHECK-NEXT:    fsrmi a0, 3
1543; CHECK-NEXT:    vmv1r.v v0, v7
1544; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1545; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
1546; CHECK-NEXT:    fsrm a0
1547; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
1548; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1549; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
1550; CHECK-NEXT:    addi a0, sp, 16
1551; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
1552; CHECK-NEXT:    csrr a0, vlenb
1553; CHECK-NEXT:    slli a0, a0, 3
1554; CHECK-NEXT:    add sp, sp, a0
1555; CHECK-NEXT:    .cfi_def_cfa sp, 16
1556; CHECK-NEXT:    addi sp, sp, 16
1557; CHECK-NEXT:    .cfi_def_cfa_offset 0
1558; CHECK-NEXT:    ret
1559  %v = call <vscale x 16 x double> @llvm.vp.ceil.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
1560  ret <vscale x 16 x double> %v
1561}
1562
1563define <vscale x 16 x double> @vp_ceil_vv_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) {
1564; CHECK-LABEL: vp_ceil_vv_nxv16f64_unmasked:
1565; CHECK:       # %bb.0:
1566; CHECK-NEXT:    csrr a1, vlenb
1567; CHECK-NEXT:    lui a2, %hi(.LCPI45_0)
1568; CHECK-NEXT:    sub a3, a0, a1
1569; CHECK-NEXT:    fld fa5, %lo(.LCPI45_0)(a2)
1570; CHECK-NEXT:    sltu a2, a0, a3
1571; CHECK-NEXT:    addi a2, a2, -1
1572; CHECK-NEXT:    and a2, a2, a3
1573; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
1574; CHECK-NEXT:    vfabs.v v24, v16
1575; CHECK-NEXT:    vmflt.vf v0, v24, fa5
1576; CHECK-NEXT:    fsrmi a2, 3
1577; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
1578; CHECK-NEXT:    fsrm a2
1579; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
1580; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1581; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
1582; CHECK-NEXT:    bltu a0, a1, .LBB45_2
1583; CHECK-NEXT:  # %bb.1:
1584; CHECK-NEXT:    mv a0, a1
1585; CHECK-NEXT:  .LBB45_2:
1586; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1587; CHECK-NEXT:    vfabs.v v24, v8
1588; CHECK-NEXT:    vmflt.vf v0, v24, fa5
1589; CHECK-NEXT:    fsrmi a0, 3
1590; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
1591; CHECK-NEXT:    fsrm a0
1592; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
1593; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1594; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
1595; CHECK-NEXT:    ret
1596  %v = call <vscale x 16 x double> @llvm.vp.ceil.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1597  ret <vscale x 16 x double> %v
1598}
1599