xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
3; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
4; RUN:     --check-prefixes=CHECK,ZVFH
5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
6; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
7; RUN:     --check-prefixes=CHECK,ZVFH
8; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
9; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
10; RUN:     --check-prefixes=CHECK,ZVFHMIN
11; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
12; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
13; RUN:     --check-prefixes=CHECK,ZVFHMIN
14
15define <vscale x 1 x bfloat> @ceil_nxv1bf16(<vscale x 1 x bfloat> %x) {
16; CHECK-LABEL: ceil_nxv1bf16:
17; CHECK:       # %bb.0:
18; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
19; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
20; CHECK-NEXT:    lui a0, 307200
21; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
22; CHECK-NEXT:    vfabs.v v8, v9
23; CHECK-NEXT:    fmv.w.x fa5, a0
24; CHECK-NEXT:    vmflt.vf v0, v8, fa5
25; CHECK-NEXT:    fsrmi a0, 3
26; CHECK-NEXT:    vfcvt.x.f.v v8, v9, v0.t
27; CHECK-NEXT:    fsrm a0
28; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
29; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
30; CHECK-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
31; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
32; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
33; CHECK-NEXT:    ret
34  %a = call <vscale x 1 x bfloat> @llvm.ceil.nxv1bf16(<vscale x 1 x bfloat> %x)
35  ret <vscale x 1 x bfloat> %a
36}
37
38define <vscale x 2 x bfloat> @ceil_nxv2bf16(<vscale x 2 x bfloat> %x) {
39; CHECK-LABEL: ceil_nxv2bf16:
40; CHECK:       # %bb.0:
41; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
42; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
43; CHECK-NEXT:    lui a0, 307200
44; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
45; CHECK-NEXT:    vfabs.v v8, v9
46; CHECK-NEXT:    fmv.w.x fa5, a0
47; CHECK-NEXT:    vmflt.vf v0, v8, fa5
48; CHECK-NEXT:    fsrmi a0, 3
49; CHECK-NEXT:    vfcvt.x.f.v v8, v9, v0.t
50; CHECK-NEXT:    fsrm a0
51; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
52; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
53; CHECK-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
54; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
55; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
56; CHECK-NEXT:    ret
57  %a = call <vscale x 2 x bfloat> @llvm.ceil.nxv2bf16(<vscale x 2 x bfloat> %x)
58  ret <vscale x 2 x bfloat> %a
59}
60
61define <vscale x 4 x bfloat> @ceil_nxv4bf16(<vscale x 4 x bfloat> %x) {
62; CHECK-LABEL: ceil_nxv4bf16:
63; CHECK:       # %bb.0:
64; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
65; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
66; CHECK-NEXT:    lui a0, 307200
67; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
68; CHECK-NEXT:    vfabs.v v8, v10
69; CHECK-NEXT:    fmv.w.x fa5, a0
70; CHECK-NEXT:    vmflt.vf v0, v8, fa5
71; CHECK-NEXT:    fsrmi a0, 3
72; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
73; CHECK-NEXT:    fsrm a0
74; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
75; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
76; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
77; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
78; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
79; CHECK-NEXT:    ret
80  %a = call <vscale x 4 x bfloat> @llvm.ceil.nxv4bf16(<vscale x 4 x bfloat> %x)
81  ret <vscale x 4 x bfloat> %a
82}
83
84define <vscale x 8 x bfloat> @ceil_nxv8bf16(<vscale x 8 x bfloat> %x) {
85; CHECK-LABEL: ceil_nxv8bf16:
86; CHECK:       # %bb.0:
87; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
88; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
89; CHECK-NEXT:    lui a0, 307200
90; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
91; CHECK-NEXT:    vfabs.v v8, v12
92; CHECK-NEXT:    fmv.w.x fa5, a0
93; CHECK-NEXT:    vmflt.vf v0, v8, fa5
94; CHECK-NEXT:    fsrmi a0, 3
95; CHECK-NEXT:    vfcvt.x.f.v v8, v12, v0.t
96; CHECK-NEXT:    fsrm a0
97; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
98; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
99; CHECK-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
100; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
101; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
102; CHECK-NEXT:    ret
103  %a = call <vscale x 8 x bfloat> @llvm.ceil.nxv8bf16(<vscale x 8 x bfloat> %x)
104  ret <vscale x 8 x bfloat> %a
105}
106
107define <vscale x 16 x bfloat> @ceil_nxv16bf16(<vscale x 16 x bfloat> %x) {
108; CHECK-LABEL: ceil_nxv16bf16:
109; CHECK:       # %bb.0:
110; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
111; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
112; CHECK-NEXT:    lui a0, 307200
113; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
114; CHECK-NEXT:    vfabs.v v8, v16
115; CHECK-NEXT:    fmv.w.x fa5, a0
116; CHECK-NEXT:    vmflt.vf v0, v8, fa5
117; CHECK-NEXT:    fsrmi a0, 3
118; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
119; CHECK-NEXT:    fsrm a0
120; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
121; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
122; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
123; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
124; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
125; CHECK-NEXT:    ret
126  %a = call <vscale x 16 x bfloat> @llvm.ceil.nxv16bf16(<vscale x 16 x bfloat> %x)
127  ret <vscale x 16 x bfloat> %a
128}
129
130define <vscale x 32 x bfloat> @ceil_nxv32bf16(<vscale x 32 x bfloat> %x) {
131; CHECK-LABEL: ceil_nxv32bf16:
132; CHECK:       # %bb.0:
133; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
134; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
135; CHECK-NEXT:    lui a0, 307200
136; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
137; CHECK-NEXT:    vfabs.v v24, v16
138; CHECK-NEXT:    fmv.w.x fa5, a0
139; CHECK-NEXT:    vmflt.vf v0, v24, fa5
140; CHECK-NEXT:    fsrmi a0, 3
141; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
142; CHECK-NEXT:    fsrm a0
143; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
144; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
145; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
146; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
147; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
148; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
149; CHECK-NEXT:    vfabs.v v8, v24
150; CHECK-NEXT:    vmflt.vf v0, v8, fa5
151; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
152; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
153; CHECK-NEXT:    fsrmi a0, 3
154; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
155; CHECK-NEXT:    vfcvt.x.f.v v16, v24, v0.t
156; CHECK-NEXT:    fsrm a0
157; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
158; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
159; CHECK-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
160; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
161; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24
162; CHECK-NEXT:    ret
163  %a = call <vscale x 32 x bfloat> @llvm.ceil.nxv32bf16(<vscale x 32 x bfloat> %x)
164  ret <vscale x 32 x bfloat> %a
165}
166
167define <vscale x 1 x half> @ceil_nxv1f16(<vscale x 1 x half> %x) {
168; ZVFH-LABEL: ceil_nxv1f16:
169; ZVFH:       # %bb.0:
170; ZVFH-NEXT:    lui a0, %hi(.LCPI6_0)
171; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a0)
172; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
173; ZVFH-NEXT:    vfabs.v v9, v8
174; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
175; ZVFH-NEXT:    fsrmi a0, 3
176; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
177; ZVFH-NEXT:    fsrm a0
178; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
179; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
180; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
181; ZVFH-NEXT:    ret
182;
183; ZVFHMIN-LABEL: ceil_nxv1f16:
184; ZVFHMIN:       # %bb.0:
185; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
186; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
187; ZVFHMIN-NEXT:    lui a0, 307200
188; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
189; ZVFHMIN-NEXT:    vfabs.v v8, v9
190; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
191; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
192; ZVFHMIN-NEXT:    fsrmi a0, 3
193; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v9, v0.t
194; ZVFHMIN-NEXT:    fsrm a0
195; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
196; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
197; ZVFHMIN-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
198; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
199; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
200; ZVFHMIN-NEXT:    ret
201  %a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
202  ret <vscale x 1 x half> %a
203}
204declare <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half>)
205
206define <vscale x 2 x half> @ceil_nxv2f16(<vscale x 2 x half> %x) {
207; ZVFH-LABEL: ceil_nxv2f16:
208; ZVFH:       # %bb.0:
209; ZVFH-NEXT:    lui a0, %hi(.LCPI7_0)
210; ZVFH-NEXT:    flh fa5, %lo(.LCPI7_0)(a0)
211; ZVFH-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
212; ZVFH-NEXT:    vfabs.v v9, v8
213; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
214; ZVFH-NEXT:    fsrmi a0, 3
215; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
216; ZVFH-NEXT:    fsrm a0
217; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
218; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
219; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
220; ZVFH-NEXT:    ret
221;
222; ZVFHMIN-LABEL: ceil_nxv2f16:
223; ZVFHMIN:       # %bb.0:
224; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
225; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
226; ZVFHMIN-NEXT:    lui a0, 307200
227; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
228; ZVFHMIN-NEXT:    vfabs.v v8, v9
229; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
230; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
231; ZVFHMIN-NEXT:    fsrmi a0, 3
232; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v9, v0.t
233; ZVFHMIN-NEXT:    fsrm a0
234; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
235; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
236; ZVFHMIN-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
237; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
238; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
239; ZVFHMIN-NEXT:    ret
240  %a = call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> %x)
241  ret <vscale x 2 x half> %a
242}
243declare <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half>)
244
245define <vscale x 4 x half> @ceil_nxv4f16(<vscale x 4 x half> %x) {
246; ZVFH-LABEL: ceil_nxv4f16:
247; ZVFH:       # %bb.0:
248; ZVFH-NEXT:    lui a0, %hi(.LCPI8_0)
249; ZVFH-NEXT:    flh fa5, %lo(.LCPI8_0)(a0)
250; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
251; ZVFH-NEXT:    vfabs.v v9, v8
252; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
253; ZVFH-NEXT:    fsrmi a0, 3
254; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
255; ZVFH-NEXT:    fsrm a0
256; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
257; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
258; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
259; ZVFH-NEXT:    ret
260;
261; ZVFHMIN-LABEL: ceil_nxv4f16:
262; ZVFHMIN:       # %bb.0:
263; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
264; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
265; ZVFHMIN-NEXT:    lui a0, 307200
266; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
267; ZVFHMIN-NEXT:    vfabs.v v8, v10
268; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
269; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
270; ZVFHMIN-NEXT:    fsrmi a0, 3
271; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
272; ZVFHMIN-NEXT:    fsrm a0
273; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
274; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
275; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
276; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
277; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
278; ZVFHMIN-NEXT:    ret
279  %a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
280  ret <vscale x 4 x half> %a
281}
282declare <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half>)
283
284define <vscale x 8 x half> @ceil_nxv8f16(<vscale x 8 x half> %x) {
285; ZVFH-LABEL: ceil_nxv8f16:
286; ZVFH:       # %bb.0:
287; ZVFH-NEXT:    lui a0, %hi(.LCPI9_0)
288; ZVFH-NEXT:    flh fa5, %lo(.LCPI9_0)(a0)
289; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
290; ZVFH-NEXT:    vfabs.v v10, v8
291; ZVFH-NEXT:    vmflt.vf v0, v10, fa5
292; ZVFH-NEXT:    fsrmi a0, 3
293; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
294; ZVFH-NEXT:    fsrm a0
295; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
296; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
297; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
298; ZVFH-NEXT:    ret
299;
300; ZVFHMIN-LABEL: ceil_nxv8f16:
301; ZVFHMIN:       # %bb.0:
302; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
303; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
304; ZVFHMIN-NEXT:    lui a0, 307200
305; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
306; ZVFHMIN-NEXT:    vfabs.v v8, v12
307; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
308; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
309; ZVFHMIN-NEXT:    fsrmi a0, 3
310; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
311; ZVFHMIN-NEXT:    fsrm a0
312; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
313; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
314; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
315; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
316; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
317; ZVFHMIN-NEXT:    ret
318  %a = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> %x)
319  ret <vscale x 8 x half> %a
320}
321declare <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half>)
322
323define <vscale x 16 x half> @ceil_nxv16f16(<vscale x 16 x half> %x) {
324; ZVFH-LABEL: ceil_nxv16f16:
325; ZVFH:       # %bb.0:
326; ZVFH-NEXT:    lui a0, %hi(.LCPI10_0)
327; ZVFH-NEXT:    flh fa5, %lo(.LCPI10_0)(a0)
328; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
329; ZVFH-NEXT:    vfabs.v v12, v8
330; ZVFH-NEXT:    vmflt.vf v0, v12, fa5
331; ZVFH-NEXT:    fsrmi a0, 3
332; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
333; ZVFH-NEXT:    fsrm a0
334; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
335; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
336; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
337; ZVFH-NEXT:    ret
338;
339; ZVFHMIN-LABEL: ceil_nxv16f16:
340; ZVFHMIN:       # %bb.0:
341; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
342; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
343; ZVFHMIN-NEXT:    lui a0, 307200
344; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
345; ZVFHMIN-NEXT:    vfabs.v v8, v16
346; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
347; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
348; ZVFHMIN-NEXT:    fsrmi a0, 3
349; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v16, v0.t
350; ZVFHMIN-NEXT:    fsrm a0
351; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
352; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
353; ZVFHMIN-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
354; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
355; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
356; ZVFHMIN-NEXT:    ret
357  %a = call <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half> %x)
358  ret <vscale x 16 x half> %a
359}
360declare <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half>)
361
362define <vscale x 32 x half> @ceil_nxv32f16(<vscale x 32 x half> %x) {
363; ZVFH-LABEL: ceil_nxv32f16:
364; ZVFH:       # %bb.0:
365; ZVFH-NEXT:    lui a0, %hi(.LCPI11_0)
366; ZVFH-NEXT:    flh fa5, %lo(.LCPI11_0)(a0)
367; ZVFH-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
368; ZVFH-NEXT:    vfabs.v v16, v8
369; ZVFH-NEXT:    vmflt.vf v0, v16, fa5
370; ZVFH-NEXT:    fsrmi a0, 3
371; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
372; ZVFH-NEXT:    fsrm a0
373; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
374; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
375; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
376; ZVFH-NEXT:    ret
377;
378; ZVFHMIN-LABEL: ceil_nxv32f16:
379; ZVFHMIN:       # %bb.0:
380; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
381; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
382; ZVFHMIN-NEXT:    lui a0, 307200
383; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
384; ZVFHMIN-NEXT:    vfabs.v v24, v16
385; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
386; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5
387; ZVFHMIN-NEXT:    fsrmi a0, 3
388; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
389; ZVFHMIN-NEXT:    fsrm a0
390; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
391; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
392; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
393; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
394; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
395; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
396; ZVFHMIN-NEXT:    vfabs.v v8, v24
397; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
398; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
399; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
400; ZVFHMIN-NEXT:    fsrmi a0, 3
401; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
402; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v24, v0.t
403; ZVFHMIN-NEXT:    fsrm a0
404; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
405; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
406; ZVFHMIN-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
407; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
408; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
409; ZVFHMIN-NEXT:    ret
410  %a = call <vscale x 32 x half> @llvm.ceil.nxv32f16(<vscale x 32 x half> %x)
411  ret <vscale x 32 x half> %a
412}
413declare <vscale x 32 x half> @llvm.ceil.nxv32f16(<vscale x 32 x half>)
414
415define <vscale x 1 x float> @ceil_nxv1f32(<vscale x 1 x float> %x) {
416; CHECK-LABEL: ceil_nxv1f32:
417; CHECK:       # %bb.0:
418; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
419; CHECK-NEXT:    vfabs.v v9, v8
420; CHECK-NEXT:    lui a0, 307200
421; CHECK-NEXT:    fmv.w.x fa5, a0
422; CHECK-NEXT:    vmflt.vf v0, v9, fa5
423; CHECK-NEXT:    fsrmi a0, 3
424; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
425; CHECK-NEXT:    fsrm a0
426; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
427; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
428; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
429; CHECK-NEXT:    ret
430  %a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
431  ret <vscale x 1 x float> %a
432}
433declare <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float>)
434
435define <vscale x 2 x float> @ceil_nxv2f32(<vscale x 2 x float> %x) {
436; CHECK-LABEL: ceil_nxv2f32:
437; CHECK:       # %bb.0:
438; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
439; CHECK-NEXT:    vfabs.v v9, v8
440; CHECK-NEXT:    lui a0, 307200
441; CHECK-NEXT:    fmv.w.x fa5, a0
442; CHECK-NEXT:    vmflt.vf v0, v9, fa5
443; CHECK-NEXT:    fsrmi a0, 3
444; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
445; CHECK-NEXT:    fsrm a0
446; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
447; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
448; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
449; CHECK-NEXT:    ret
450  %a = call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> %x)
451  ret <vscale x 2 x float> %a
452}
453declare <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float>)
454
455define <vscale x 4 x float> @ceil_nxv4f32(<vscale x 4 x float> %x) {
456; CHECK-LABEL: ceil_nxv4f32:
457; CHECK:       # %bb.0:
458; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
459; CHECK-NEXT:    vfabs.v v10, v8
460; CHECK-NEXT:    lui a0, 307200
461; CHECK-NEXT:    fmv.w.x fa5, a0
462; CHECK-NEXT:    vmflt.vf v0, v10, fa5
463; CHECK-NEXT:    fsrmi a0, 3
464; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
465; CHECK-NEXT:    fsrm a0
466; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
467; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
468; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
469; CHECK-NEXT:    ret
470  %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
471  ret <vscale x 4 x float> %a
472}
473declare <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float>)
474
475define <vscale x 8 x float> @ceil_nxv8f32(<vscale x 8 x float> %x) {
476; CHECK-LABEL: ceil_nxv8f32:
477; CHECK:       # %bb.0:
478; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
479; CHECK-NEXT:    vfabs.v v12, v8
480; CHECK-NEXT:    lui a0, 307200
481; CHECK-NEXT:    fmv.w.x fa5, a0
482; CHECK-NEXT:    vmflt.vf v0, v12, fa5
483; CHECK-NEXT:    fsrmi a0, 3
484; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
485; CHECK-NEXT:    fsrm a0
486; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
487; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
488; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
489; CHECK-NEXT:    ret
490  %a = call <vscale x 8 x float> @llvm.ceil.nxv8f32(<vscale x 8 x float> %x)
491  ret <vscale x 8 x float> %a
492}
493declare <vscale x 8 x float> @llvm.ceil.nxv8f32(<vscale x 8 x float>)
494
495define <vscale x 16 x float> @ceil_nxv16f32(<vscale x 16 x float> %x) {
496; CHECK-LABEL: ceil_nxv16f32:
497; CHECK:       # %bb.0:
498; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
499; CHECK-NEXT:    vfabs.v v16, v8
500; CHECK-NEXT:    lui a0, 307200
501; CHECK-NEXT:    fmv.w.x fa5, a0
502; CHECK-NEXT:    vmflt.vf v0, v16, fa5
503; CHECK-NEXT:    fsrmi a0, 3
504; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
505; CHECK-NEXT:    fsrm a0
506; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
507; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
508; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
509; CHECK-NEXT:    ret
510  %a = call <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float> %x)
511  ret <vscale x 16 x float> %a
512}
513declare <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float>)
514
515define <vscale x 1 x double> @ceil_nxv1f64(<vscale x 1 x double> %x) {
516; CHECK-LABEL: ceil_nxv1f64:
517; CHECK:       # %bb.0:
518; CHECK-NEXT:    lui a0, %hi(.LCPI17_0)
519; CHECK-NEXT:    fld fa5, %lo(.LCPI17_0)(a0)
520; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
521; CHECK-NEXT:    vfabs.v v9, v8
522; CHECK-NEXT:    vmflt.vf v0, v9, fa5
523; CHECK-NEXT:    fsrmi a0, 3
524; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
525; CHECK-NEXT:    fsrm a0
526; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
527; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
528; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
529; CHECK-NEXT:    ret
530  %a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x)
531  ret <vscale x 1 x double> %a
532}
533declare <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double>)
534
535define <vscale x 2 x double> @ceil_nxv2f64(<vscale x 2 x double> %x) {
536; CHECK-LABEL: ceil_nxv2f64:
537; CHECK:       # %bb.0:
538; CHECK-NEXT:    lui a0, %hi(.LCPI18_0)
539; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
540; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
541; CHECK-NEXT:    vfabs.v v10, v8
542; CHECK-NEXT:    vmflt.vf v0, v10, fa5
543; CHECK-NEXT:    fsrmi a0, 3
544; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
545; CHECK-NEXT:    fsrm a0
546; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
547; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
548; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
549; CHECK-NEXT:    ret
550  %a = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> %x)
551  ret <vscale x 2 x double> %a
552}
553declare <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double>)
554
555define <vscale x 4 x double> @ceil_nxv4f64(<vscale x 4 x double> %x) {
556; CHECK-LABEL: ceil_nxv4f64:
557; CHECK:       # %bb.0:
558; CHECK-NEXT:    lui a0, %hi(.LCPI19_0)
559; CHECK-NEXT:    fld fa5, %lo(.LCPI19_0)(a0)
560; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
561; CHECK-NEXT:    vfabs.v v12, v8
562; CHECK-NEXT:    vmflt.vf v0, v12, fa5
563; CHECK-NEXT:    fsrmi a0, 3
564; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
565; CHECK-NEXT:    fsrm a0
566; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
567; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
568; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
569; CHECK-NEXT:    ret
570  %a = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> %x)
571  ret <vscale x 4 x double> %a
572}
573declare <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double>)
574
575define <vscale x 8 x double> @ceil_nxv8f64(<vscale x 8 x double> %x) {
576; CHECK-LABEL: ceil_nxv8f64:
577; CHECK:       # %bb.0:
578; CHECK-NEXT:    lui a0, %hi(.LCPI20_0)
579; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
580; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
581; CHECK-NEXT:    vfabs.v v16, v8
582; CHECK-NEXT:    vmflt.vf v0, v16, fa5
583; CHECK-NEXT:    fsrmi a0, 3
584; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
585; CHECK-NEXT:    fsrm a0
586; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
587; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
588; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
589; CHECK-NEXT:    ret
590  %a = call <vscale x 8 x double> @llvm.ceil.nxv8f64(<vscale x 8 x double> %x)
591  ret <vscale x 8 x double> %a
592}
593declare <vscale x 8 x double> @llvm.ceil.nxv8f64(<vscale x 8 x double>)
594