xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll (revision db158c7c830807caeeb0691739c41f1d522029e9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5
6target triple = "aarch64-unknown-linux-gnu"
7
8;
9; CEIL -> FRINTP
10;
11
12; Don't use SVE for 64-bit vectors.
13define <4 x half> @frintp_v4f16(<4 x half> %op) vscale_range(2,0) #0 {
14; CHECK-LABEL: frintp_v4f16:
15; CHECK:       // %bb.0:
16; CHECK-NEXT:    frintp v0.4h, v0.4h
17; CHECK-NEXT:    ret
18  %res = call <4 x half> @llvm.ceil.v4f16(<4 x half> %op)
19  ret <4 x half> %res
20}
21
22; Don't use SVE for 128-bit vectors.
23define <8 x half> @frintp_v8f16(<8 x half> %op) vscale_range(2,0) #0 {
24; CHECK-LABEL: frintp_v8f16:
25; CHECK:       // %bb.0:
26; CHECK-NEXT:    frintp v0.8h, v0.8h
27; CHECK-NEXT:    ret
28  %res = call <8 x half> @llvm.ceil.v8f16(<8 x half> %op)
29  ret <8 x half> %res
30}
31
32define void @frintp_v16f16(ptr %a) vscale_range(2,0) #0 {
33; CHECK-LABEL: frintp_v16f16:
34; CHECK:       // %bb.0:
35; CHECK-NEXT:    ptrue p0.h, vl16
36; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
37; CHECK-NEXT:    frintp z0.h, p0/m, z0.h
38; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
39; CHECK-NEXT:    ret
40  %op = load <16 x half>, ptr %a
41  %res = call <16 x half> @llvm.ceil.v16f16(<16 x half> %op)
42  store <16 x half> %res, ptr %a
43  ret void
44}
45
46define void @frintp_v32f16(ptr %a) #0 {
47; VBITS_GE_256-LABEL: frintp_v32f16:
48; VBITS_GE_256:       // %bb.0:
49; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
50; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
51; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
52; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
53; VBITS_GE_256-NEXT:    frintp z0.h, p0/m, z0.h
54; VBITS_GE_256-NEXT:    frintp z1.h, p0/m, z1.h
55; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
56; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x0]
57; VBITS_GE_256-NEXT:    ret
58;
59; VBITS_GE_512-LABEL: frintp_v32f16:
60; VBITS_GE_512:       // %bb.0:
61; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
62; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
63; VBITS_GE_512-NEXT:    frintp z0.h, p0/m, z0.h
64; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
65; VBITS_GE_512-NEXT:    ret
66  %op = load <32 x half>, ptr %a
67  %res = call <32 x half> @llvm.ceil.v32f16(<32 x half> %op)
68  store <32 x half> %res, ptr %a
69  ret void
70}
71
72define void @frintp_v64f16(ptr %a) vscale_range(8,0) #0 {
73; CHECK-LABEL: frintp_v64f16:
74; CHECK:       // %bb.0:
75; CHECK-NEXT:    ptrue p0.h, vl64
76; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
77; CHECK-NEXT:    frintp z0.h, p0/m, z0.h
78; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
79; CHECK-NEXT:    ret
80  %op = load <64 x half>, ptr %a
81  %res = call <64 x half> @llvm.ceil.v64f16(<64 x half> %op)
82  store <64 x half> %res, ptr %a
83  ret void
84}
85
86define void @frintp_v128f16(ptr %a) vscale_range(16,0) #0 {
87; CHECK-LABEL: frintp_v128f16:
88; CHECK:       // %bb.0:
89; CHECK-NEXT:    ptrue p0.h, vl128
90; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
91; CHECK-NEXT:    frintp z0.h, p0/m, z0.h
92; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
93; CHECK-NEXT:    ret
94  %op = load <128 x half>, ptr %a
95  %res = call <128 x half> @llvm.ceil.v128f16(<128 x half> %op)
96  store <128 x half> %res, ptr %a
97  ret void
98}
99
100; Don't use SVE for 64-bit vectors.
101define <2 x float> @frintp_v2f32(<2 x float> %op) vscale_range(2,0) #0 {
102; CHECK-LABEL: frintp_v2f32:
103; CHECK:       // %bb.0:
104; CHECK-NEXT:    frintp v0.2s, v0.2s
105; CHECK-NEXT:    ret
106  %res = call <2 x float> @llvm.ceil.v2f32(<2 x float> %op)
107  ret <2 x float> %res
108}
109
110; Don't use SVE for 128-bit vectors.
111define <4 x float> @frintp_v4f32(<4 x float> %op) vscale_range(2,0) #0 {
112; CHECK-LABEL: frintp_v4f32:
113; CHECK:       // %bb.0:
114; CHECK-NEXT:    frintp v0.4s, v0.4s
115; CHECK-NEXT:    ret
116  %res = call <4 x float> @llvm.ceil.v4f32(<4 x float> %op)
117  ret <4 x float> %res
118}
119
120define void @frintp_v8f32(ptr %a) vscale_range(2,0) #0 {
121; CHECK-LABEL: frintp_v8f32:
122; CHECK:       // %bb.0:
123; CHECK-NEXT:    ptrue p0.s, vl8
124; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
125; CHECK-NEXT:    frintp z0.s, p0/m, z0.s
126; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
127; CHECK-NEXT:    ret
128  %op = load <8 x float>, ptr %a
129  %res = call <8 x float> @llvm.ceil.v8f32(<8 x float> %op)
130  store <8 x float> %res, ptr %a
131  ret void
132}
133
134define void @frintp_v16f32(ptr %a) #0 {
135; VBITS_GE_256-LABEL: frintp_v16f32:
136; VBITS_GE_256:       // %bb.0:
137; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
138; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
139; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
140; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
141; VBITS_GE_256-NEXT:    frintp z0.s, p0/m, z0.s
142; VBITS_GE_256-NEXT:    frintp z1.s, p0/m, z1.s
143; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
144; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x0]
145; VBITS_GE_256-NEXT:    ret
146;
147; VBITS_GE_512-LABEL: frintp_v16f32:
148; VBITS_GE_512:       // %bb.0:
149; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
150; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
151; VBITS_GE_512-NEXT:    frintp z0.s, p0/m, z0.s
152; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
153; VBITS_GE_512-NEXT:    ret
154  %op = load <16 x float>, ptr %a
155  %res = call <16 x float> @llvm.ceil.v16f32(<16 x float> %op)
156  store <16 x float> %res, ptr %a
157  ret void
158}
159
160define void @frintp_v32f32(ptr %a) vscale_range(8,0) #0 {
161; CHECK-LABEL: frintp_v32f32:
162; CHECK:       // %bb.0:
163; CHECK-NEXT:    ptrue p0.s, vl32
164; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
165; CHECK-NEXT:    frintp z0.s, p0/m, z0.s
166; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
167; CHECK-NEXT:    ret
168  %op = load <32 x float>, ptr %a
169  %res = call <32 x float> @llvm.ceil.v32f32(<32 x float> %op)
170  store <32 x float> %res, ptr %a
171  ret void
172}
173
174define void @frintp_v64f32(ptr %a) vscale_range(16,0) #0 {
175; CHECK-LABEL: frintp_v64f32:
176; CHECK:       // %bb.0:
177; CHECK-NEXT:    ptrue p0.s, vl64
178; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
179; CHECK-NEXT:    frintp z0.s, p0/m, z0.s
180; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
181; CHECK-NEXT:    ret
182  %op = load <64 x float>, ptr %a
183  %res = call <64 x float> @llvm.ceil.v64f32(<64 x float> %op)
184  store <64 x float> %res, ptr %a
185  ret void
186}
187
188; Don't use SVE for 64-bit vectors.
189define <1 x double> @frintp_v1f64(<1 x double> %op) vscale_range(2,0) #0 {
190; CHECK-LABEL: frintp_v1f64:
191; CHECK:       // %bb.0:
192; CHECK-NEXT:    frintp d0, d0
193; CHECK-NEXT:    ret
194  %res = call <1 x double> @llvm.ceil.v1f64(<1 x double> %op)
195  ret <1 x double> %res
196}
197
198; Don't use SVE for 128-bit vectors.
199define <2 x double> @frintp_v2f64(<2 x double> %op) vscale_range(2,0) #0 {
200; CHECK-LABEL: frintp_v2f64:
201; CHECK:       // %bb.0:
202; CHECK-NEXT:    frintp v0.2d, v0.2d
203; CHECK-NEXT:    ret
204  %res = call <2 x double> @llvm.ceil.v2f64(<2 x double> %op)
205  ret <2 x double> %res
206}
207
208define void @frintp_v4f64(ptr %a) vscale_range(2,0) #0 {
209; CHECK-LABEL: frintp_v4f64:
210; CHECK:       // %bb.0:
211; CHECK-NEXT:    ptrue p0.d, vl4
212; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
213; CHECK-NEXT:    frintp z0.d, p0/m, z0.d
214; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
215; CHECK-NEXT:    ret
216  %op = load <4 x double>, ptr %a
217  %res = call <4 x double> @llvm.ceil.v4f64(<4 x double> %op)
218  store <4 x double> %res, ptr %a
219  ret void
220}
221
222define void @frintp_v8f64(ptr %a) #0 {
223; VBITS_GE_256-LABEL: frintp_v8f64:
224; VBITS_GE_256:       // %bb.0:
225; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
226; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
227; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
228; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
229; VBITS_GE_256-NEXT:    frintp z0.d, p0/m, z0.d
230; VBITS_GE_256-NEXT:    frintp z1.d, p0/m, z1.d
231; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
232; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x0]
233; VBITS_GE_256-NEXT:    ret
234;
235; VBITS_GE_512-LABEL: frintp_v8f64:
236; VBITS_GE_512:       // %bb.0:
237; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
238; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
239; VBITS_GE_512-NEXT:    frintp z0.d, p0/m, z0.d
240; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
241; VBITS_GE_512-NEXT:    ret
242  %op = load <8 x double>, ptr %a
243  %res = call <8 x double> @llvm.ceil.v8f64(<8 x double> %op)
244  store <8 x double> %res, ptr %a
245  ret void
246}
247
248define void @frintp_v16f64(ptr %a) vscale_range(8,0) #0 {
249; CHECK-LABEL: frintp_v16f64:
250; CHECK:       // %bb.0:
251; CHECK-NEXT:    ptrue p0.d, vl16
252; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
253; CHECK-NEXT:    frintp z0.d, p0/m, z0.d
254; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
255; CHECK-NEXT:    ret
256  %op = load <16 x double>, ptr %a
257  %res = call <16 x double> @llvm.ceil.v16f64(<16 x double> %op)
258  store <16 x double> %res, ptr %a
259  ret void
260}
261
262define void @frintp_v32f64(ptr %a) vscale_range(16,0) #0 {
263; CHECK-LABEL: frintp_v32f64:
264; CHECK:       // %bb.0:
265; CHECK-NEXT:    ptrue p0.d, vl32
266; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
267; CHECK-NEXT:    frintp z0.d, p0/m, z0.d
268; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
269; CHECK-NEXT:    ret
270  %op = load <32 x double>, ptr %a
271  %res = call <32 x double> @llvm.ceil.v32f64(<32 x double> %op)
272  store <32 x double> %res, ptr %a
273  ret void
274}
275
276;
277; FLOOR -> FRINTM
278;
279
280; Don't use SVE for 64-bit vectors.
281define <4 x half> @frintm_v4f16(<4 x half> %op) vscale_range(2,0) #0 {
282; CHECK-LABEL: frintm_v4f16:
283; CHECK:       // %bb.0:
284; CHECK-NEXT:    frintm v0.4h, v0.4h
285; CHECK-NEXT:    ret
286  %res = call <4 x half> @llvm.floor.v4f16(<4 x half> %op)
287  ret <4 x half> %res
288}
289
290; Don't use SVE for 128-bit vectors.
291define <8 x half> @frintm_v8f16(<8 x half> %op) vscale_range(2,0) #0 {
292; CHECK-LABEL: frintm_v8f16:
293; CHECK:       // %bb.0:
294; CHECK-NEXT:    frintm v0.8h, v0.8h
295; CHECK-NEXT:    ret
296  %res = call <8 x half> @llvm.floor.v8f16(<8 x half> %op)
297  ret <8 x half> %res
298}
299
300define void @frintm_v16f16(ptr %a) vscale_range(2,0) #0 {
301; CHECK-LABEL: frintm_v16f16:
302; CHECK:       // %bb.0:
303; CHECK-NEXT:    ptrue p0.h, vl16
304; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
305; CHECK-NEXT:    frintm z0.h, p0/m, z0.h
306; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
307; CHECK-NEXT:    ret
308  %op = load <16 x half>, ptr %a
309  %res = call <16 x half> @llvm.floor.v16f16(<16 x half> %op)
310  store <16 x half> %res, ptr %a
311  ret void
312}
313
314define void @frintm_v32f16(ptr %a) #0 {
315; VBITS_GE_256-LABEL: frintm_v32f16:
316; VBITS_GE_256:       // %bb.0:
317; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
318; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
319; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
320; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
321; VBITS_GE_256-NEXT:    frintm z0.h, p0/m, z0.h
322; VBITS_GE_256-NEXT:    frintm z1.h, p0/m, z1.h
323; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
324; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x0]
325; VBITS_GE_256-NEXT:    ret
326;
327; VBITS_GE_512-LABEL: frintm_v32f16:
328; VBITS_GE_512:       // %bb.0:
329; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
330; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
331; VBITS_GE_512-NEXT:    frintm z0.h, p0/m, z0.h
332; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
333; VBITS_GE_512-NEXT:    ret
334  %op = load <32 x half>, ptr %a
335  %res = call <32 x half> @llvm.floor.v32f16(<32 x half> %op)
336  store <32 x half> %res, ptr %a
337  ret void
338}
339
340define void @frintm_v64f16(ptr %a) vscale_range(8,0) #0 {
341; CHECK-LABEL: frintm_v64f16:
342; CHECK:       // %bb.0:
343; CHECK-NEXT:    ptrue p0.h, vl64
344; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
345; CHECK-NEXT:    frintm z0.h, p0/m, z0.h
346; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
347; CHECK-NEXT:    ret
348  %op = load <64 x half>, ptr %a
349  %res = call <64 x half> @llvm.floor.v64f16(<64 x half> %op)
350  store <64 x half> %res, ptr %a
351  ret void
352}
353
354define void @frintm_v128f16(ptr %a) vscale_range(16,0) #0 {
355; CHECK-LABEL: frintm_v128f16:
356; CHECK:       // %bb.0:
357; CHECK-NEXT:    ptrue p0.h, vl128
358; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
359; CHECK-NEXT:    frintm z0.h, p0/m, z0.h
360; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
361; CHECK-NEXT:    ret
362  %op = load <128 x half>, ptr %a
363  %res = call <128 x half> @llvm.floor.v128f16(<128 x half> %op)
364  store <128 x half> %res, ptr %a
365  ret void
366}
367
368; Don't use SVE for 64-bit vectors.
369define <2 x float> @frintm_v2f32(<2 x float> %op) vscale_range(2,0) #0 {
370; CHECK-LABEL: frintm_v2f32:
371; CHECK:       // %bb.0:
372; CHECK-NEXT:    frintm v0.2s, v0.2s
373; CHECK-NEXT:    ret
374  %res = call <2 x float> @llvm.floor.v2f32(<2 x float> %op)
375  ret <2 x float> %res
376}
377
378; Don't use SVE for 128-bit vectors.
379define <4 x float> @frintm_v4f32(<4 x float> %op) vscale_range(2,0) #0 {
380; CHECK-LABEL: frintm_v4f32:
381; CHECK:       // %bb.0:
382; CHECK-NEXT:    frintm v0.4s, v0.4s
383; CHECK-NEXT:    ret
384  %res = call <4 x float> @llvm.floor.v4f32(<4 x float> %op)
385  ret <4 x float> %res
386}
387
388define void @frintm_v8f32(ptr %a) vscale_range(2,0) #0 {
389; CHECK-LABEL: frintm_v8f32:
390; CHECK:       // %bb.0:
391; CHECK-NEXT:    ptrue p0.s, vl8
392; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
393; CHECK-NEXT:    frintm z0.s, p0/m, z0.s
394; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
395; CHECK-NEXT:    ret
396  %op = load <8 x float>, ptr %a
397  %res = call <8 x float> @llvm.floor.v8f32(<8 x float> %op)
398  store <8 x float> %res, ptr %a
399  ret void
400}
401
402define void @frintm_v16f32(ptr %a) #0 {
403; VBITS_GE_256-LABEL: frintm_v16f32:
404; VBITS_GE_256:       // %bb.0:
405; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
406; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
407; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
408; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
409; VBITS_GE_256-NEXT:    frintm z0.s, p0/m, z0.s
410; VBITS_GE_256-NEXT:    frintm z1.s, p0/m, z1.s
411; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
412; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x0]
413; VBITS_GE_256-NEXT:    ret
414;
415; VBITS_GE_512-LABEL: frintm_v16f32:
416; VBITS_GE_512:       // %bb.0:
417; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
418; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
419; VBITS_GE_512-NEXT:    frintm z0.s, p0/m, z0.s
420; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
421; VBITS_GE_512-NEXT:    ret
422  %op = load <16 x float>, ptr %a
423  %res = call <16 x float> @llvm.floor.v16f32(<16 x float> %op)
424  store <16 x float> %res, ptr %a
425  ret void
426}
427
428define void @frintm_v32f32(ptr %a) vscale_range(8,0) #0 {
429; CHECK-LABEL: frintm_v32f32:
430; CHECK:       // %bb.0:
431; CHECK-NEXT:    ptrue p0.s, vl32
432; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
433; CHECK-NEXT:    frintm z0.s, p0/m, z0.s
434; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
435; CHECK-NEXT:    ret
436  %op = load <32 x float>, ptr %a
437  %res = call <32 x float> @llvm.floor.v32f32(<32 x float> %op)
438  store <32 x float> %res, ptr %a
439  ret void
440}
441
442define void @frintm_v64f32(ptr %a) vscale_range(16,0) #0 {
443; CHECK-LABEL: frintm_v64f32:
444; CHECK:       // %bb.0:
445; CHECK-NEXT:    ptrue p0.s, vl64
446; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
447; CHECK-NEXT:    frintm z0.s, p0/m, z0.s
448; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
449; CHECK-NEXT:    ret
450  %op = load <64 x float>, ptr %a
451  %res = call <64 x float> @llvm.floor.v64f32(<64 x float> %op)
452  store <64 x float> %res, ptr %a
453  ret void
454}
455
456; Don't use SVE for 64-bit vectors.
457define <1 x double> @frintm_v1f64(<1 x double> %op) vscale_range(2,0) #0 {
458; CHECK-LABEL: frintm_v1f64:
459; CHECK:       // %bb.0:
460; CHECK-NEXT:    frintm d0, d0
461; CHECK-NEXT:    ret
462  %res = call <1 x double> @llvm.floor.v1f64(<1 x double> %op)
463  ret <1 x double> %res
464}
465
466; Don't use SVE for 128-bit vectors.
467define <2 x double> @frintm_v2f64(<2 x double> %op) vscale_range(2,0) #0 {
468; CHECK-LABEL: frintm_v2f64:
469; CHECK:       // %bb.0:
470; CHECK-NEXT:    frintm v0.2d, v0.2d
471; CHECK-NEXT:    ret
472  %res = call <2 x double> @llvm.floor.v2f64(<2 x double> %op)
473  ret <2 x double> %res
474}
475
476define void @frintm_v4f64(ptr %a) vscale_range(2,0) #0 {
477; CHECK-LABEL: frintm_v4f64:
478; CHECK:       // %bb.0:
479; CHECK-NEXT:    ptrue p0.d, vl4
480; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
481; CHECK-NEXT:    frintm z0.d, p0/m, z0.d
482; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
483; CHECK-NEXT:    ret
484  %op = load <4 x double>, ptr %a
485  %res = call <4 x double> @llvm.floor.v4f64(<4 x double> %op)
486  store <4 x double> %res, ptr %a
487  ret void
488}
489
490define void @frintm_v8f64(ptr %a) #0 {
491; VBITS_GE_256-LABEL: frintm_v8f64:
492; VBITS_GE_256:       // %bb.0:
493; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
494; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
495; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
496; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
497; VBITS_GE_256-NEXT:    frintm z0.d, p0/m, z0.d
498; VBITS_GE_256-NEXT:    frintm z1.d, p0/m, z1.d
499; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
500; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x0]
501; VBITS_GE_256-NEXT:    ret
502;
503; VBITS_GE_512-LABEL: frintm_v8f64:
504; VBITS_GE_512:       // %bb.0:
505; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
506; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
507; VBITS_GE_512-NEXT:    frintm z0.d, p0/m, z0.d
508; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
509; VBITS_GE_512-NEXT:    ret
510  %op = load <8 x double>, ptr %a
511  %res = call <8 x double> @llvm.floor.v8f64(<8 x double> %op)
512  store <8 x double> %res, ptr %a
513  ret void
514}
515
516define void @frintm_v16f64(ptr %a) vscale_range(8,0) #0 {
517; CHECK-LABEL: frintm_v16f64:
518; CHECK:       // %bb.0:
519; CHECK-NEXT:    ptrue p0.d, vl16
520; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
521; CHECK-NEXT:    frintm z0.d, p0/m, z0.d
522; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
523; CHECK-NEXT:    ret
524  %op = load <16 x double>, ptr %a
525  %res = call <16 x double> @llvm.floor.v16f64(<16 x double> %op)
526  store <16 x double> %res, ptr %a
527  ret void
528}
529
530define void @frintm_v32f64(ptr %a) vscale_range(16,0) #0 {
531; CHECK-LABEL: frintm_v32f64:
532; CHECK:       // %bb.0:
533; CHECK-NEXT:    ptrue p0.d, vl32
534; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
535; CHECK-NEXT:    frintm z0.d, p0/m, z0.d
536; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
537; CHECK-NEXT:    ret
538  %op = load <32 x double>, ptr %a
539  %res = call <32 x double> @llvm.floor.v32f64(<32 x double> %op)
540  store <32 x double> %res, ptr %a
541  ret void
542}
543
544;
545; FNEARBYINT -> FRINTI
546;
547
548; Don't use SVE for 64-bit vectors.
549define <4 x half> @frinti_v4f16(<4 x half> %op) vscale_range(2,0) #0 {
550; CHECK-LABEL: frinti_v4f16:
551; CHECK:       // %bb.0:
552; CHECK-NEXT:    frinti v0.4h, v0.4h
553; CHECK-NEXT:    ret
554  %res = call <4 x half> @llvm.nearbyint.v4f16(<4 x half> %op)
555  ret <4 x half> %res
556}
557
558; Don't use SVE for 128-bit vectors.
559define <8 x half> @frinti_v8f16(<8 x half> %op) vscale_range(2,0) #0 {
560; CHECK-LABEL: frinti_v8f16:
561; CHECK:       // %bb.0:
562; CHECK-NEXT:    frinti v0.8h, v0.8h
563; CHECK-NEXT:    ret
564  %res = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %op)
565  ret <8 x half> %res
566}
567
568define void @frinti_v16f16(ptr %a) vscale_range(2,0) #0 {
569; CHECK-LABEL: frinti_v16f16:
570; CHECK:       // %bb.0:
571; CHECK-NEXT:    ptrue p0.h, vl16
572; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
573; CHECK-NEXT:    frinti z0.h, p0/m, z0.h
574; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
575; CHECK-NEXT:    ret
576  %op = load <16 x half>, ptr %a
577  %res = call <16 x half> @llvm.nearbyint.v16f16(<16 x half> %op)
578  store <16 x half> %res, ptr %a
579  ret void
580}
581
582define void @frinti_v32f16(ptr %a) #0 {
583; VBITS_GE_256-LABEL: frinti_v32f16:
584; VBITS_GE_256:       // %bb.0:
585; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
586; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
587; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
588; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
589; VBITS_GE_256-NEXT:    frinti z0.h, p0/m, z0.h
590; VBITS_GE_256-NEXT:    frinti z1.h, p0/m, z1.h
591; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
592; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x0]
593; VBITS_GE_256-NEXT:    ret
594;
595; VBITS_GE_512-LABEL: frinti_v32f16:
596; VBITS_GE_512:       // %bb.0:
597; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
598; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
599; VBITS_GE_512-NEXT:    frinti z0.h, p0/m, z0.h
600; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
601; VBITS_GE_512-NEXT:    ret
602  %op = load <32 x half>, ptr %a
603  %res = call <32 x half> @llvm.nearbyint.v32f16(<32 x half> %op)
604  store <32 x half> %res, ptr %a
605  ret void
606}
607
608define void @frinti_v64f16(ptr %a) vscale_range(8,0) #0 {
609; CHECK-LABEL: frinti_v64f16:
610; CHECK:       // %bb.0:
611; CHECK-NEXT:    ptrue p0.h, vl64
612; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
613; CHECK-NEXT:    frinti z0.h, p0/m, z0.h
614; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
615; CHECK-NEXT:    ret
616  %op = load <64 x half>, ptr %a
617  %res = call <64 x half> @llvm.nearbyint.v64f16(<64 x half> %op)
618  store <64 x half> %res, ptr %a
619  ret void
620}
621
622define void @frinti_v128f16(ptr %a) vscale_range(16,0) #0 {
623; CHECK-LABEL: frinti_v128f16:
624; CHECK:       // %bb.0:
625; CHECK-NEXT:    ptrue p0.h, vl128
626; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
627; CHECK-NEXT:    frinti z0.h, p0/m, z0.h
628; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
629; CHECK-NEXT:    ret
630  %op = load <128 x half>, ptr %a
631  %res = call <128 x half> @llvm.nearbyint.v128f16(<128 x half> %op)
632  store <128 x half> %res, ptr %a
633  ret void
634}
635
636; Don't use SVE for 64-bit vectors.
637define <2 x float> @frinti_v2f32(<2 x float> %op) vscale_range(2,0) #0 {
638; CHECK-LABEL: frinti_v2f32:
639; CHECK:       // %bb.0:
640; CHECK-NEXT:    frinti v0.2s, v0.2s
641; CHECK-NEXT:    ret
642  %res = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %op)
643  ret <2 x float> %res
644}
645
646; Don't use SVE for 128-bit vectors.
647define <4 x float> @frinti_v4f32(<4 x float> %op) vscale_range(2,0) #0 {
648; CHECK-LABEL: frinti_v4f32:
649; CHECK:       // %bb.0:
650; CHECK-NEXT:    frinti v0.4s, v0.4s
651; CHECK-NEXT:    ret
652  %res = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %op)
653  ret <4 x float> %res
654}
655
656define void @frinti_v8f32(ptr %a) vscale_range(2,0) #0 {
657; CHECK-LABEL: frinti_v8f32:
658; CHECK:       // %bb.0:
659; CHECK-NEXT:    ptrue p0.s, vl8
660; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
661; CHECK-NEXT:    frinti z0.s, p0/m, z0.s
662; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
663; CHECK-NEXT:    ret
664  %op = load <8 x float>, ptr %a
665  %res = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %op)
666  store <8 x float> %res, ptr %a
667  ret void
668}
669
670define void @frinti_v16f32(ptr %a) #0 {
671; VBITS_GE_256-LABEL: frinti_v16f32:
672; VBITS_GE_256:       // %bb.0:
673; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
674; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
675; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
676; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
677; VBITS_GE_256-NEXT:    frinti z0.s, p0/m, z0.s
678; VBITS_GE_256-NEXT:    frinti z1.s, p0/m, z1.s
679; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
680; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x0]
681; VBITS_GE_256-NEXT:    ret
682;
683; VBITS_GE_512-LABEL: frinti_v16f32:
684; VBITS_GE_512:       // %bb.0:
685; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
686; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
687; VBITS_GE_512-NEXT:    frinti z0.s, p0/m, z0.s
688; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
689; VBITS_GE_512-NEXT:    ret
690  %op = load <16 x float>, ptr %a
691  %res = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %op)
692  store <16 x float> %res, ptr %a
693  ret void
694}
695
696define void @frinti_v32f32(ptr %a) vscale_range(8,0) #0 {
697; CHECK-LABEL: frinti_v32f32:
698; CHECK:       // %bb.0:
699; CHECK-NEXT:    ptrue p0.s, vl32
700; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
701; CHECK-NEXT:    frinti z0.s, p0/m, z0.s
702; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
703; CHECK-NEXT:    ret
704  %op = load <32 x float>, ptr %a
705  %res = call <32 x float> @llvm.nearbyint.v32f32(<32 x float> %op)
706  store <32 x float> %res, ptr %a
707  ret void
708}
709
710define void @frinti_v64f32(ptr %a) vscale_range(16,0) #0 {
711; CHECK-LABEL: frinti_v64f32:
712; CHECK:       // %bb.0:
713; CHECK-NEXT:    ptrue p0.s, vl64
714; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
715; CHECK-NEXT:    frinti z0.s, p0/m, z0.s
716; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
717; CHECK-NEXT:    ret
718  %op = load <64 x float>, ptr %a
719  %res = call <64 x float> @llvm.nearbyint.v64f32(<64 x float> %op)
720  store <64 x float> %res, ptr %a
721  ret void
722}
723
724; Don't use SVE for 64-bit vectors.
725define <1 x double> @frinti_v1f64(<1 x double> %op) vscale_range(2,0) #0 {
726; CHECK-LABEL: frinti_v1f64:
727; CHECK:       // %bb.0:
728; CHECK-NEXT:    frinti d0, d0
729; CHECK-NEXT:    ret
730  %res = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %op)
731  ret <1 x double> %res
732}
733
734; Don't use SVE for 128-bit vectors.
735define <2 x double> @frinti_v2f64(<2 x double> %op) vscale_range(2,0) #0 {
736; CHECK-LABEL: frinti_v2f64:
737; CHECK:       // %bb.0:
738; CHECK-NEXT:    frinti v0.2d, v0.2d
739; CHECK-NEXT:    ret
740  %res = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %op)
741  ret <2 x double> %res
742}
743
744define void @frinti_v4f64(ptr %a) vscale_range(2,0) #0 {
745; CHECK-LABEL: frinti_v4f64:
746; CHECK:       // %bb.0:
747; CHECK-NEXT:    ptrue p0.d, vl4
748; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
749; CHECK-NEXT:    frinti z0.d, p0/m, z0.d
750; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
751; CHECK-NEXT:    ret
752  %op = load <4 x double>, ptr %a
753  %res = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %op)
754  store <4 x double> %res, ptr %a
755  ret void
756}
757
758define void @frinti_v8f64(ptr %a) #0 {
759; VBITS_GE_256-LABEL: frinti_v8f64:
760; VBITS_GE_256:       // %bb.0:
761; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
762; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
763; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
764; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
765; VBITS_GE_256-NEXT:    frinti z0.d, p0/m, z0.d
766; VBITS_GE_256-NEXT:    frinti z1.d, p0/m, z1.d
767; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
768; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x0]
769; VBITS_GE_256-NEXT:    ret
770;
771; VBITS_GE_512-LABEL: frinti_v8f64:
772; VBITS_GE_512:       // %bb.0:
773; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
774; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
775; VBITS_GE_512-NEXT:    frinti z0.d, p0/m, z0.d
776; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
777; VBITS_GE_512-NEXT:    ret
778  %op = load <8 x double>, ptr %a
779  %res = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %op)
780  store <8 x double> %res, ptr %a
781  ret void
782}
783
784define void @frinti_v16f64(ptr %a) vscale_range(8,0) #0 {
785; CHECK-LABEL: frinti_v16f64:
786; CHECK:       // %bb.0:
787; CHECK-NEXT:    ptrue p0.d, vl16
788; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
789; CHECK-NEXT:    frinti z0.d, p0/m, z0.d
790; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
791; CHECK-NEXT:    ret
792  %op = load <16 x double>, ptr %a
793  %res = call <16 x double> @llvm.nearbyint.v16f64(<16 x double> %op)
794  store <16 x double> %res, ptr %a
795  ret void
796}
797
798define void @frinti_v32f64(ptr %a) vscale_range(16,0) #0 {
799; CHECK-LABEL: frinti_v32f64:
800; CHECK:       // %bb.0:
801; CHECK-NEXT:    ptrue p0.d, vl32
802; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
803; CHECK-NEXT:    frinti z0.d, p0/m, z0.d
804; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
805; CHECK-NEXT:    ret
806  %op = load <32 x double>, ptr %a
807  %res = call <32 x double> @llvm.nearbyint.v32f64(<32 x double> %op)
808  store <32 x double> %res, ptr %a
809  ret void
810}
811
812;
813; RINT -> FRINTX
814;
815
816; Don't use SVE for 64-bit vectors.
817define <4 x half> @frintx_v4f16(<4 x half> %op) vscale_range(2,0) #0 {
818; CHECK-LABEL: frintx_v4f16:
819; CHECK:       // %bb.0:
820; CHECK-NEXT:    frintx v0.4h, v0.4h
821; CHECK-NEXT:    ret
822  %res = call <4 x half> @llvm.rint.v4f16(<4 x half> %op)
823  ret <4 x half> %res
824}
825
826; Don't use SVE for 128-bit vectors.
827define <8 x half> @frintx_v8f16(<8 x half> %op) vscale_range(2,0) #0 {
828; CHECK-LABEL: frintx_v8f16:
829; CHECK:       // %bb.0:
830; CHECK-NEXT:    frintx v0.8h, v0.8h
831; CHECK-NEXT:    ret
832  %res = call <8 x half> @llvm.rint.v8f16(<8 x half> %op)
833  ret <8 x half> %res
834}
835
836define void @frintx_v16f16(ptr %a) vscale_range(2,0) #0 {
837; CHECK-LABEL: frintx_v16f16:
838; CHECK:       // %bb.0:
839; CHECK-NEXT:    ptrue p0.h, vl16
840; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
841; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
842; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
843; CHECK-NEXT:    ret
844  %op = load <16 x half>, ptr %a
845  %res = call <16 x half> @llvm.rint.v16f16(<16 x half> %op)
846  store <16 x half> %res, ptr %a
847  ret void
848}
849
850define void @frintx_v32f16(ptr %a) #0 {
851; VBITS_GE_256-LABEL: frintx_v32f16:
852; VBITS_GE_256:       // %bb.0:
853; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
854; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
855; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
856; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
857; VBITS_GE_256-NEXT:    frintx z0.h, p0/m, z0.h
858; VBITS_GE_256-NEXT:    frintx z1.h, p0/m, z1.h
859; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
860; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x0]
861; VBITS_GE_256-NEXT:    ret
862;
863; VBITS_GE_512-LABEL: frintx_v32f16:
864; VBITS_GE_512:       // %bb.0:
865; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
866; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
867; VBITS_GE_512-NEXT:    frintx z0.h, p0/m, z0.h
868; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
869; VBITS_GE_512-NEXT:    ret
870  %op = load <32 x half>, ptr %a
871  %res = call <32 x half> @llvm.rint.v32f16(<32 x half> %op)
872  store <32 x half> %res, ptr %a
873  ret void
874}
875
876define void @frintx_v64f16(ptr %a) vscale_range(8,0) #0 {
877; CHECK-LABEL: frintx_v64f16:
878; CHECK:       // %bb.0:
879; CHECK-NEXT:    ptrue p0.h, vl64
880; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
881; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
882; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
883; CHECK-NEXT:    ret
884  %op = load <64 x half>, ptr %a
885  %res = call <64 x half> @llvm.rint.v64f16(<64 x half> %op)
886  store <64 x half> %res, ptr %a
887  ret void
888}
889
890define void @frintx_v128f16(ptr %a) vscale_range(16,0) #0 {
891; CHECK-LABEL: frintx_v128f16:
892; CHECK:       // %bb.0:
893; CHECK-NEXT:    ptrue p0.h, vl128
894; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
895; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
896; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
897; CHECK-NEXT:    ret
898  %op = load <128 x half>, ptr %a
899  %res = call <128 x half> @llvm.rint.v128f16(<128 x half> %op)
900  store <128 x half> %res, ptr %a
901  ret void
902}
903
904; Don't use SVE for 64-bit vectors.
905define <2 x float> @frintx_v2f32(<2 x float> %op) vscale_range(2,0) #0 {
906; CHECK-LABEL: frintx_v2f32:
907; CHECK:       // %bb.0:
908; CHECK-NEXT:    frintx v0.2s, v0.2s
909; CHECK-NEXT:    ret
910  %res = call <2 x float> @llvm.rint.v2f32(<2 x float> %op)
911  ret <2 x float> %res
912}
913
914; Don't use SVE for 128-bit vectors.
915define <4 x float> @frintx_v4f32(<4 x float> %op) vscale_range(2,0) #0 {
916; CHECK-LABEL: frintx_v4f32:
917; CHECK:       // %bb.0:
918; CHECK-NEXT:    frintx v0.4s, v0.4s
919; CHECK-NEXT:    ret
920  %res = call <4 x float> @llvm.rint.v4f32(<4 x float> %op)
921  ret <4 x float> %res
922}
923
924define void @frintx_v8f32(ptr %a) vscale_range(2,0) #0 {
925; CHECK-LABEL: frintx_v8f32:
926; CHECK:       // %bb.0:
927; CHECK-NEXT:    ptrue p0.s, vl8
928; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
929; CHECK-NEXT:    frintx z0.s, p0/m, z0.s
930; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
931; CHECK-NEXT:    ret
932  %op = load <8 x float>, ptr %a
933  %res = call <8 x float> @llvm.rint.v8f32(<8 x float> %op)
934  store <8 x float> %res, ptr %a
935  ret void
936}
937
938define void @frintx_v16f32(ptr %a) #0 {
939; VBITS_GE_256-LABEL: frintx_v16f32:
940; VBITS_GE_256:       // %bb.0:
941; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
942; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
943; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
944; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
945; VBITS_GE_256-NEXT:    frintx z0.s, p0/m, z0.s
946; VBITS_GE_256-NEXT:    frintx z1.s, p0/m, z1.s
947; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
948; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x0]
949; VBITS_GE_256-NEXT:    ret
950;
951; VBITS_GE_512-LABEL: frintx_v16f32:
952; VBITS_GE_512:       // %bb.0:
953; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
954; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
955; VBITS_GE_512-NEXT:    frintx z0.s, p0/m, z0.s
956; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
957; VBITS_GE_512-NEXT:    ret
958  %op = load <16 x float>, ptr %a
959  %res = call <16 x float> @llvm.rint.v16f32(<16 x float> %op)
960  store <16 x float> %res, ptr %a
961  ret void
962}
963
964define void @frintx_v32f32(ptr %a) vscale_range(8,0) #0 {
965; CHECK-LABEL: frintx_v32f32:
966; CHECK:       // %bb.0:
967; CHECK-NEXT:    ptrue p0.s, vl32
968; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
969; CHECK-NEXT:    frintx z0.s, p0/m, z0.s
970; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
971; CHECK-NEXT:    ret
972  %op = load <32 x float>, ptr %a
973  %res = call <32 x float> @llvm.rint.v32f32(<32 x float> %op)
974  store <32 x float> %res, ptr %a
975  ret void
976}
977
978define void @frintx_v64f32(ptr %a) vscale_range(16,0) #0 {
979; CHECK-LABEL: frintx_v64f32:
980; CHECK:       // %bb.0:
981; CHECK-NEXT:    ptrue p0.s, vl64
982; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
983; CHECK-NEXT:    frintx z0.s, p0/m, z0.s
984; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
985; CHECK-NEXT:    ret
986  %op = load <64 x float>, ptr %a
987  %res = call <64 x float> @llvm.rint.v64f32(<64 x float> %op)
988  store <64 x float> %res, ptr %a
989  ret void
990}
991
992; Don't use SVE for 64-bit vectors.
993define <1 x double> @frintx_v1f64(<1 x double> %op) vscale_range(2,0) #0 {
994; CHECK-LABEL: frintx_v1f64:
995; CHECK:       // %bb.0:
996; CHECK-NEXT:    frintx d0, d0
997; CHECK-NEXT:    ret
998  %res = call <1 x double> @llvm.rint.v1f64(<1 x double> %op)
999  ret <1 x double> %res
1000}
1001
1002; Don't use SVE for 128-bit vectors.
1003define <2 x double> @frintx_v2f64(<2 x double> %op) vscale_range(2,0) #0 {
1004; CHECK-LABEL: frintx_v2f64:
1005; CHECK:       // %bb.0:
1006; CHECK-NEXT:    frintx v0.2d, v0.2d
1007; CHECK-NEXT:    ret
1008  %res = call <2 x double> @llvm.rint.v2f64(<2 x double> %op)
1009  ret <2 x double> %res
1010}
1011
1012define void @frintx_v4f64(ptr %a) vscale_range(2,0) #0 {
1013; CHECK-LABEL: frintx_v4f64:
1014; CHECK:       // %bb.0:
1015; CHECK-NEXT:    ptrue p0.d, vl4
1016; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1017; CHECK-NEXT:    frintx z0.d, p0/m, z0.d
1018; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
1019; CHECK-NEXT:    ret
1020  %op = load <4 x double>, ptr %a
1021  %res = call <4 x double> @llvm.rint.v4f64(<4 x double> %op)
1022  store <4 x double> %res, ptr %a
1023  ret void
1024}
1025
1026define void @frintx_v8f64(ptr %a) #0 {
1027; VBITS_GE_256-LABEL: frintx_v8f64:
1028; VBITS_GE_256:       // %bb.0:
1029; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
1030; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
1031; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1032; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
1033; VBITS_GE_256-NEXT:    frintx z0.d, p0/m, z0.d
1034; VBITS_GE_256-NEXT:    frintx z1.d, p0/m, z1.d
1035; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
1036; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x0]
1037; VBITS_GE_256-NEXT:    ret
1038;
1039; VBITS_GE_512-LABEL: frintx_v8f64:
1040; VBITS_GE_512:       // %bb.0:
1041; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
1042; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
1043; VBITS_GE_512-NEXT:    frintx z0.d, p0/m, z0.d
1044; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
1045; VBITS_GE_512-NEXT:    ret
1046  %op = load <8 x double>, ptr %a
1047  %res = call <8 x double> @llvm.rint.v8f64(<8 x double> %op)
1048  store <8 x double> %res, ptr %a
1049  ret void
1050}
1051
1052define void @frintx_v16f64(ptr %a) vscale_range(8,0) #0 {
1053; CHECK-LABEL: frintx_v16f64:
1054; CHECK:       // %bb.0:
1055; CHECK-NEXT:    ptrue p0.d, vl16
1056; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1057; CHECK-NEXT:    frintx z0.d, p0/m, z0.d
1058; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
1059; CHECK-NEXT:    ret
1060  %op = load <16 x double>, ptr %a
1061  %res = call <16 x double> @llvm.rint.v16f64(<16 x double> %op)
1062  store <16 x double> %res, ptr %a
1063  ret void
1064}
1065
1066define void @frintx_v32f64(ptr %a) vscale_range(16,0) #0 {
1067; CHECK-LABEL: frintx_v32f64:
1068; CHECK:       // %bb.0:
1069; CHECK-NEXT:    ptrue p0.d, vl32
1070; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1071; CHECK-NEXT:    frintx z0.d, p0/m, z0.d
1072; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
1073; CHECK-NEXT:    ret
1074  %op = load <32 x double>, ptr %a
1075  %res = call <32 x double> @llvm.rint.v32f64(<32 x double> %op)
1076  store <32 x double> %res, ptr %a
1077  ret void
1078}
1079
1080;
1081; ROUND -> FRINTA
1082;
1083
1084; Don't use SVE for 64-bit vectors.
1085define <4 x half> @frinta_v4f16(<4 x half> %op) vscale_range(2,0) #0 {
1086; CHECK-LABEL: frinta_v4f16:
1087; CHECK:       // %bb.0:
1088; CHECK-NEXT:    frinta v0.4h, v0.4h
1089; CHECK-NEXT:    ret
1090  %res = call <4 x half> @llvm.round.v4f16(<4 x half> %op)
1091  ret <4 x half> %res
1092}
1093
1094; Don't use SVE for 128-bit vectors.
1095define <8 x half> @frinta_v8f16(<8 x half> %op) vscale_range(2,0) #0 {
1096; CHECK-LABEL: frinta_v8f16:
1097; CHECK:       // %bb.0:
1098; CHECK-NEXT:    frinta v0.8h, v0.8h
1099; CHECK-NEXT:    ret
1100  %res = call <8 x half> @llvm.round.v8f16(<8 x half> %op)
1101  ret <8 x half> %res
1102}
1103
1104define void @frinta_v16f16(ptr %a) vscale_range(2,0) #0 {
1105; CHECK-LABEL: frinta_v16f16:
1106; CHECK:       // %bb.0:
1107; CHECK-NEXT:    ptrue p0.h, vl16
1108; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1109; CHECK-NEXT:    frinta z0.h, p0/m, z0.h
1110; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
1111; CHECK-NEXT:    ret
1112  %op = load <16 x half>, ptr %a
1113  %res = call <16 x half> @llvm.round.v16f16(<16 x half> %op)
1114  store <16 x half> %res, ptr %a
1115  ret void
1116}
1117
1118define void @frinta_v32f16(ptr %a) #0 {
1119; VBITS_GE_256-LABEL: frinta_v32f16:
1120; VBITS_GE_256:       // %bb.0:
1121; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
1122; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
1123; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
1124; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
1125; VBITS_GE_256-NEXT:    frinta z0.h, p0/m, z0.h
1126; VBITS_GE_256-NEXT:    frinta z1.h, p0/m, z1.h
1127; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
1128; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x0]
1129; VBITS_GE_256-NEXT:    ret
1130;
1131; VBITS_GE_512-LABEL: frinta_v32f16:
1132; VBITS_GE_512:       // %bb.0:
1133; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
1134; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
1135; VBITS_GE_512-NEXT:    frinta z0.h, p0/m, z0.h
1136; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
1137; VBITS_GE_512-NEXT:    ret
1138  %op = load <32 x half>, ptr %a
1139  %res = call <32 x half> @llvm.round.v32f16(<32 x half> %op)
1140  store <32 x half> %res, ptr %a
1141  ret void
1142}
1143
1144define void @frinta_v64f16(ptr %a) vscale_range(8,0) #0 {
1145; CHECK-LABEL: frinta_v64f16:
1146; CHECK:       // %bb.0:
1147; CHECK-NEXT:    ptrue p0.h, vl64
1148; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1149; CHECK-NEXT:    frinta z0.h, p0/m, z0.h
1150; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
1151; CHECK-NEXT:    ret
1152  %op = load <64 x half>, ptr %a
1153  %res = call <64 x half> @llvm.round.v64f16(<64 x half> %op)
1154  store <64 x half> %res, ptr %a
1155  ret void
1156}
1157
1158define void @frinta_v128f16(ptr %a) vscale_range(16,0) #0 {
1159; CHECK-LABEL: frinta_v128f16:
1160; CHECK:       // %bb.0:
1161; CHECK-NEXT:    ptrue p0.h, vl128
1162; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1163; CHECK-NEXT:    frinta z0.h, p0/m, z0.h
1164; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
1165; CHECK-NEXT:    ret
1166  %op = load <128 x half>, ptr %a
1167  %res = call <128 x half> @llvm.round.v128f16(<128 x half> %op)
1168  store <128 x half> %res, ptr %a
1169  ret void
1170}
1171
1172; Don't use SVE for 64-bit vectors.
1173define <2 x float> @frinta_v2f32(<2 x float> %op) vscale_range(2,0) #0 {
1174; CHECK-LABEL: frinta_v2f32:
1175; CHECK:       // %bb.0:
1176; CHECK-NEXT:    frinta v0.2s, v0.2s
1177; CHECK-NEXT:    ret
1178  %res = call <2 x float> @llvm.round.v2f32(<2 x float> %op)
1179  ret <2 x float> %res
1180}
1181
1182; Don't use SVE for 128-bit vectors.
1183define <4 x float> @frinta_v4f32(<4 x float> %op) vscale_range(2,0) #0 {
1184; CHECK-LABEL: frinta_v4f32:
1185; CHECK:       // %bb.0:
1186; CHECK-NEXT:    frinta v0.4s, v0.4s
1187; CHECK-NEXT:    ret
1188  %res = call <4 x float> @llvm.round.v4f32(<4 x float> %op)
1189  ret <4 x float> %res
1190}
1191
1192define void @frinta_v8f32(ptr %a) vscale_range(2,0) #0 {
1193; CHECK-LABEL: frinta_v8f32:
1194; CHECK:       // %bb.0:
1195; CHECK-NEXT:    ptrue p0.s, vl8
1196; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1197; CHECK-NEXT:    frinta z0.s, p0/m, z0.s
1198; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
1199; CHECK-NEXT:    ret
1200  %op = load <8 x float>, ptr %a
1201  %res = call <8 x float> @llvm.round.v8f32(<8 x float> %op)
1202  store <8 x float> %res, ptr %a
1203  ret void
1204}
1205
1206define void @frinta_v16f32(ptr %a) #0 {
1207; VBITS_GE_256-LABEL: frinta_v16f32:
1208; VBITS_GE_256:       // %bb.0:
1209; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
1210; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
1211; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
1212; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
1213; VBITS_GE_256-NEXT:    frinta z0.s, p0/m, z0.s
1214; VBITS_GE_256-NEXT:    frinta z1.s, p0/m, z1.s
1215; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
1216; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x0]
1217; VBITS_GE_256-NEXT:    ret
1218;
1219; VBITS_GE_512-LABEL: frinta_v16f32:
1220; VBITS_GE_512:       // %bb.0:
1221; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
1222; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
1223; VBITS_GE_512-NEXT:    frinta z0.s, p0/m, z0.s
1224; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
1225; VBITS_GE_512-NEXT:    ret
1226  %op = load <16 x float>, ptr %a
1227  %res = call <16 x float> @llvm.round.v16f32(<16 x float> %op)
1228  store <16 x float> %res, ptr %a
1229  ret void
1230}
1231
1232define void @frinta_v32f32(ptr %a) vscale_range(8,0) #0 {
1233; CHECK-LABEL: frinta_v32f32:
1234; CHECK:       // %bb.0:
1235; CHECK-NEXT:    ptrue p0.s, vl32
1236; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1237; CHECK-NEXT:    frinta z0.s, p0/m, z0.s
1238; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
1239; CHECK-NEXT:    ret
1240  %op = load <32 x float>, ptr %a
1241  %res = call <32 x float> @llvm.round.v32f32(<32 x float> %op)
1242  store <32 x float> %res, ptr %a
1243  ret void
1244}
1245
1246define void @frinta_v64f32(ptr %a) vscale_range(16,0) #0 {
1247; CHECK-LABEL: frinta_v64f32:
1248; CHECK:       // %bb.0:
1249; CHECK-NEXT:    ptrue p0.s, vl64
1250; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1251; CHECK-NEXT:    frinta z0.s, p0/m, z0.s
1252; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
1253; CHECK-NEXT:    ret
1254  %op = load <64 x float>, ptr %a
1255  %res = call <64 x float> @llvm.round.v64f32(<64 x float> %op)
1256  store <64 x float> %res, ptr %a
1257  ret void
1258}
1259
1260; Don't use SVE for 64-bit vectors.
1261define <1 x double> @frinta_v1f64(<1 x double> %op) vscale_range(2,0) #0 {
1262; CHECK-LABEL: frinta_v1f64:
1263; CHECK:       // %bb.0:
1264; CHECK-NEXT:    frinta d0, d0
1265; CHECK-NEXT:    ret
1266  %res = call <1 x double> @llvm.round.v1f64(<1 x double> %op)
1267  ret <1 x double> %res
1268}
1269
1270; Don't use SVE for 128-bit vectors.
1271define <2 x double> @frinta_v2f64(<2 x double> %op) vscale_range(2,0) #0 {
1272; CHECK-LABEL: frinta_v2f64:
1273; CHECK:       // %bb.0:
1274; CHECK-NEXT:    frinta v0.2d, v0.2d
1275; CHECK-NEXT:    ret
1276  %res = call <2 x double> @llvm.round.v2f64(<2 x double> %op)
1277  ret <2 x double> %res
1278}
1279
1280define void @frinta_v4f64(ptr %a) vscale_range(2,0) #0 {
1281; CHECK-LABEL: frinta_v4f64:
1282; CHECK:       // %bb.0:
1283; CHECK-NEXT:    ptrue p0.d, vl4
1284; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1285; CHECK-NEXT:    frinta z0.d, p0/m, z0.d
1286; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
1287; CHECK-NEXT:    ret
1288  %op = load <4 x double>, ptr %a
1289  %res = call <4 x double> @llvm.round.v4f64(<4 x double> %op)
1290  store <4 x double> %res, ptr %a
1291  ret void
1292}
1293
1294define void @frinta_v8f64(ptr %a) #0 {
1295; VBITS_GE_256-LABEL: frinta_v8f64:
1296; VBITS_GE_256:       // %bb.0:
1297; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
1298; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
1299; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1300; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
1301; VBITS_GE_256-NEXT:    frinta z0.d, p0/m, z0.d
1302; VBITS_GE_256-NEXT:    frinta z1.d, p0/m, z1.d
1303; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
1304; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x0]
1305; VBITS_GE_256-NEXT:    ret
1306;
1307; VBITS_GE_512-LABEL: frinta_v8f64:
1308; VBITS_GE_512:       // %bb.0:
1309; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
1310; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
1311; VBITS_GE_512-NEXT:    frinta z0.d, p0/m, z0.d
1312; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
1313; VBITS_GE_512-NEXT:    ret
1314  %op = load <8 x double>, ptr %a
1315  %res = call <8 x double> @llvm.round.v8f64(<8 x double> %op)
1316  store <8 x double> %res, ptr %a
1317  ret void
1318}
1319
1320define void @frinta_v16f64(ptr %a) vscale_range(8,0) #0 {
1321; CHECK-LABEL: frinta_v16f64:
1322; CHECK:       // %bb.0:
1323; CHECK-NEXT:    ptrue p0.d, vl16
1324; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1325; CHECK-NEXT:    frinta z0.d, p0/m, z0.d
1326; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
1327; CHECK-NEXT:    ret
1328  %op = load <16 x double>, ptr %a
1329  %res = call <16 x double> @llvm.round.v16f64(<16 x double> %op)
1330  store <16 x double> %res, ptr %a
1331  ret void
1332}
1333
1334define void @frinta_v32f64(ptr %a) vscale_range(16,0) #0 {
1335; CHECK-LABEL: frinta_v32f64:
1336; CHECK:       // %bb.0:
1337; CHECK-NEXT:    ptrue p0.d, vl32
1338; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1339; CHECK-NEXT:    frinta z0.d, p0/m, z0.d
1340; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
1341; CHECK-NEXT:    ret
1342  %op = load <32 x double>, ptr %a
1343  %res = call <32 x double> @llvm.round.v32f64(<32 x double> %op)
1344  store <32 x double> %res, ptr %a
1345  ret void
1346}
1347
1348;
1349; ROUNDEVEN -> FRINTN
1350;
1351
1352; Don't use SVE for 64-bit vectors.
1353define <4 x half> @frintn_v4f16(<4 x half> %op) vscale_range(2,0) #0 {
1354; CHECK-LABEL: frintn_v4f16:
1355; CHECK:       // %bb.0:
1356; CHECK-NEXT:    frintn v0.4h, v0.4h
1357; CHECK-NEXT:    ret
1358  %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op)
1359  ret <4 x half> %res
1360}
1361
1362; Don't use SVE for 128-bit vectors.
1363define <8 x half> @frintn_v8f16(<8 x half> %op) vscale_range(2,0) #0 {
1364; CHECK-LABEL: frintn_v8f16:
1365; CHECK:       // %bb.0:
1366; CHECK-NEXT:    frintn v0.8h, v0.8h
1367; CHECK-NEXT:    ret
1368  %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op)
1369  ret <8 x half> %res
1370}
1371
1372define void @frintn_v16f16(ptr %a) vscale_range(2,0) #0 {
1373; CHECK-LABEL: frintn_v16f16:
1374; CHECK:       // %bb.0:
1375; CHECK-NEXT:    ptrue p0.h, vl16
1376; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1377; CHECK-NEXT:    frintn z0.h, p0/m, z0.h
1378; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
1379; CHECK-NEXT:    ret
1380  %op = load <16 x half>, ptr %a
1381  %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op)
1382  store <16 x half> %res, ptr %a
1383  ret void
1384}
1385
1386define void @frintn_v32f16(ptr %a) #0 {
1387; VBITS_GE_256-LABEL: frintn_v32f16:
1388; VBITS_GE_256:       // %bb.0:
1389; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
1390; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
1391; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
1392; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
1393; VBITS_GE_256-NEXT:    frintn z0.h, p0/m, z0.h
1394; VBITS_GE_256-NEXT:    frintn z1.h, p0/m, z1.h
1395; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
1396; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x0]
1397; VBITS_GE_256-NEXT:    ret
1398;
1399; VBITS_GE_512-LABEL: frintn_v32f16:
1400; VBITS_GE_512:       // %bb.0:
1401; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
1402; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
1403; VBITS_GE_512-NEXT:    frintn z0.h, p0/m, z0.h
1404; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
1405; VBITS_GE_512-NEXT:    ret
1406  %op = load <32 x half>, ptr %a
1407  %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op)
1408  store <32 x half> %res, ptr %a
1409  ret void
1410}
1411
1412define void @frintn_v64f16(ptr %a) vscale_range(8,0) #0 {
1413; CHECK-LABEL: frintn_v64f16:
1414; CHECK:       // %bb.0:
1415; CHECK-NEXT:    ptrue p0.h, vl64
1416; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1417; CHECK-NEXT:    frintn z0.h, p0/m, z0.h
1418; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
1419; CHECK-NEXT:    ret
1420  %op = load <64 x half>, ptr %a
1421  %res = call <64 x half> @llvm.roundeven.v64f16(<64 x half> %op)
1422  store <64 x half> %res, ptr %a
1423  ret void
1424}
1425
1426define void @frintn_v128f16(ptr %a) vscale_range(16,0) #0 {
1427; CHECK-LABEL: frintn_v128f16:
1428; CHECK:       // %bb.0:
1429; CHECK-NEXT:    ptrue p0.h, vl128
1430; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1431; CHECK-NEXT:    frintn z0.h, p0/m, z0.h
1432; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
1433; CHECK-NEXT:    ret
1434  %op = load <128 x half>, ptr %a
1435  %res = call <128 x half> @llvm.roundeven.v128f16(<128 x half> %op)
1436  store <128 x half> %res, ptr %a
1437  ret void
1438}
1439
1440; Don't use SVE for 64-bit vectors.
1441define <2 x float> @frintn_v2f32(<2 x float> %op) vscale_range(2,0) #0 {
1442; CHECK-LABEL: frintn_v2f32:
1443; CHECK:       // %bb.0:
1444; CHECK-NEXT:    frintn v0.2s, v0.2s
1445; CHECK-NEXT:    ret
1446  %res = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %op)
1447  ret <2 x float> %res
1448}
1449
1450; Don't use SVE for 128-bit vectors.
1451define <4 x float> @frintn_v4f32(<4 x float> %op) vscale_range(2,0) #0 {
1452; CHECK-LABEL: frintn_v4f32:
1453; CHECK:       // %bb.0:
1454; CHECK-NEXT:    frintn v0.4s, v0.4s
1455; CHECK-NEXT:    ret
1456  %res = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %op)
1457  ret <4 x float> %res
1458}
1459
1460define void @frintn_v8f32(ptr %a) vscale_range(2,0) #0 {
1461; CHECK-LABEL: frintn_v8f32:
1462; CHECK:       // %bb.0:
1463; CHECK-NEXT:    ptrue p0.s, vl8
1464; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1465; CHECK-NEXT:    frintn z0.s, p0/m, z0.s
1466; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
1467; CHECK-NEXT:    ret
1468  %op = load <8 x float>, ptr %a
1469  %res = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %op)
1470  store <8 x float> %res, ptr %a
1471  ret void
1472}
1473
1474define void @frintn_v16f32(ptr %a) #0 {
1475; VBITS_GE_256-LABEL: frintn_v16f32:
1476; VBITS_GE_256:       // %bb.0:
1477; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
1478; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
1479; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
1480; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
1481; VBITS_GE_256-NEXT:    frintn z0.s, p0/m, z0.s
1482; VBITS_GE_256-NEXT:    frintn z1.s, p0/m, z1.s
1483; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
1484; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x0]
1485; VBITS_GE_256-NEXT:    ret
1486;
1487; VBITS_GE_512-LABEL: frintn_v16f32:
1488; VBITS_GE_512:       // %bb.0:
1489; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
1490; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
1491; VBITS_GE_512-NEXT:    frintn z0.s, p0/m, z0.s
1492; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
1493; VBITS_GE_512-NEXT:    ret
1494  %op = load <16 x float>, ptr %a
1495  %res = call <16 x float> @llvm.roundeven.v16f32(<16 x float> %op)
1496  store <16 x float> %res, ptr %a
1497  ret void
1498}
1499
1500define void @frintn_v32f32(ptr %a) vscale_range(8,0) #0 {
1501; CHECK-LABEL: frintn_v32f32:
1502; CHECK:       // %bb.0:
1503; CHECK-NEXT:    ptrue p0.s, vl32
1504; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1505; CHECK-NEXT:    frintn z0.s, p0/m, z0.s
1506; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
1507; CHECK-NEXT:    ret
1508  %op = load <32 x float>, ptr %a
1509  %res = call <32 x float> @llvm.roundeven.v32f32(<32 x float> %op)
1510  store <32 x float> %res, ptr %a
1511  ret void
1512}
1513
1514define void @frintn_v64f32(ptr %a) vscale_range(16,0) #0 {
1515; CHECK-LABEL: frintn_v64f32:
1516; CHECK:       // %bb.0:
1517; CHECK-NEXT:    ptrue p0.s, vl64
1518; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1519; CHECK-NEXT:    frintn z0.s, p0/m, z0.s
1520; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
1521; CHECK-NEXT:    ret
1522  %op = load <64 x float>, ptr %a
1523  %res = call <64 x float> @llvm.roundeven.v64f32(<64 x float> %op)
1524  store <64 x float> %res, ptr %a
1525  ret void
1526}
1527
1528; Don't use SVE for 64-bit vectors.
1529define <1 x double> @frintn_v1f64(<1 x double> %op) vscale_range(2,0) #0 {
1530; CHECK-LABEL: frintn_v1f64:
1531; CHECK:       // %bb.0:
1532; CHECK-NEXT:    frintn d0, d0
1533; CHECK-NEXT:    ret
1534  %res = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %op)
1535  ret <1 x double> %res
1536}
1537
1538; Don't use SVE for 128-bit vectors.
1539define <2 x double> @frintn_v2f64(<2 x double> %op) vscale_range(2,0) #0 {
1540; CHECK-LABEL: frintn_v2f64:
1541; CHECK:       // %bb.0:
1542; CHECK-NEXT:    frintn v0.2d, v0.2d
1543; CHECK-NEXT:    ret
1544  %res = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %op)
1545  ret <2 x double> %res
1546}
1547
1548define void @frintn_v4f64(ptr %a) vscale_range(2,0) #0 {
1549; CHECK-LABEL: frintn_v4f64:
1550; CHECK:       // %bb.0:
1551; CHECK-NEXT:    ptrue p0.d, vl4
1552; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1553; CHECK-NEXT:    frintn z0.d, p0/m, z0.d
1554; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
1555; CHECK-NEXT:    ret
1556  %op = load <4 x double>, ptr %a
1557  %res = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %op)
1558  store <4 x double> %res, ptr %a
1559  ret void
1560}
1561
1562define void @frintn_v8f64(ptr %a) #0 {
1563; VBITS_GE_256-LABEL: frintn_v8f64:
1564; VBITS_GE_256:       // %bb.0:
1565; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
1566; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
1567; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1568; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
1569; VBITS_GE_256-NEXT:    frintn z0.d, p0/m, z0.d
1570; VBITS_GE_256-NEXT:    frintn z1.d, p0/m, z1.d
1571; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
1572; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x0]
1573; VBITS_GE_256-NEXT:    ret
1574;
1575; VBITS_GE_512-LABEL: frintn_v8f64:
1576; VBITS_GE_512:       // %bb.0:
1577; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
1578; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
1579; VBITS_GE_512-NEXT:    frintn z0.d, p0/m, z0.d
1580; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
1581; VBITS_GE_512-NEXT:    ret
1582  %op = load <8 x double>, ptr %a
1583  %res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %op)
1584  store <8 x double> %res, ptr %a
1585  ret void
1586}
1587
1588define void @frintn_v16f64(ptr %a) vscale_range(8,0) #0 {
1589; CHECK-LABEL: frintn_v16f64:
1590; CHECK:       // %bb.0:
1591; CHECK-NEXT:    ptrue p0.d, vl16
1592; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1593; CHECK-NEXT:    frintn z0.d, p0/m, z0.d
1594; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
1595; CHECK-NEXT:    ret
1596  %op = load <16 x double>, ptr %a
1597  %res = call <16 x double> @llvm.roundeven.v16f64(<16 x double> %op)
1598  store <16 x double> %res, ptr %a
1599  ret void
1600}
1601
1602define void @frintn_v32f64(ptr %a) vscale_range(16,0) #0 {
1603; CHECK-LABEL: frintn_v32f64:
1604; CHECK:       // %bb.0:
1605; CHECK-NEXT:    ptrue p0.d, vl32
1606; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1607; CHECK-NEXT:    frintn z0.d, p0/m, z0.d
1608; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
1609; CHECK-NEXT:    ret
1610  %op = load <32 x double>, ptr %a
1611  %res = call <32 x double> @llvm.roundeven.v32f64(<32 x double> %op)
1612  store <32 x double> %res, ptr %a
1613  ret void
1614}
1615
1616;
1617; TRUNC -> FRINTZ
1618;
1619
1620; Don't use SVE for 64-bit vectors.
1621define <4 x half> @frintz_v4f16(<4 x half> %op) vscale_range(2,0) #0 {
1622; CHECK-LABEL: frintz_v4f16:
1623; CHECK:       // %bb.0:
1624; CHECK-NEXT:    frintz v0.4h, v0.4h
1625; CHECK-NEXT:    ret
1626  %res = call <4 x half> @llvm.trunc.v4f16(<4 x half> %op)
1627  ret <4 x half> %res
1628}
1629
1630; Don't use SVE for 128-bit vectors.
1631define <8 x half> @frintz_v8f16(<8 x half> %op) vscale_range(2,0) #0 {
1632; CHECK-LABEL: frintz_v8f16:
1633; CHECK:       // %bb.0:
1634; CHECK-NEXT:    frintz v0.8h, v0.8h
1635; CHECK-NEXT:    ret
1636  %res = call <8 x half> @llvm.trunc.v8f16(<8 x half> %op)
1637  ret <8 x half> %res
1638}
1639
1640define void @frintz_v16f16(ptr %a) vscale_range(2,0) #0 {
1641; CHECK-LABEL: frintz_v16f16:
1642; CHECK:       // %bb.0:
1643; CHECK-NEXT:    ptrue p0.h, vl16
1644; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1645; CHECK-NEXT:    frintz z0.h, p0/m, z0.h
1646; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
1647; CHECK-NEXT:    ret
1648  %op = load <16 x half>, ptr %a
1649  %res = call <16 x half> @llvm.trunc.v16f16(<16 x half> %op)
1650  store <16 x half> %res, ptr %a
1651  ret void
1652}
1653
1654define void @frintz_v32f16(ptr %a) #0 {
1655; VBITS_GE_256-LABEL: frintz_v32f16:
1656; VBITS_GE_256:       // %bb.0:
1657; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
1658; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
1659; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
1660; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
1661; VBITS_GE_256-NEXT:    frintz z0.h, p0/m, z0.h
1662; VBITS_GE_256-NEXT:    frintz z1.h, p0/m, z1.h
1663; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
1664; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x0]
1665; VBITS_GE_256-NEXT:    ret
1666;
1667; VBITS_GE_512-LABEL: frintz_v32f16:
1668; VBITS_GE_512:       // %bb.0:
1669; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
1670; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
1671; VBITS_GE_512-NEXT:    frintz z0.h, p0/m, z0.h
1672; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
1673; VBITS_GE_512-NEXT:    ret
1674  %op = load <32 x half>, ptr %a
1675  %res = call <32 x half> @llvm.trunc.v32f16(<32 x half> %op)
1676  store <32 x half> %res, ptr %a
1677  ret void
1678}
1679
1680define void @frintz_v64f16(ptr %a) vscale_range(8,0) #0 {
1681; CHECK-LABEL: frintz_v64f16:
1682; CHECK:       // %bb.0:
1683; CHECK-NEXT:    ptrue p0.h, vl64
1684; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1685; CHECK-NEXT:    frintz z0.h, p0/m, z0.h
1686; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
1687; CHECK-NEXT:    ret
1688  %op = load <64 x half>, ptr %a
1689  %res = call <64 x half> @llvm.trunc.v64f16(<64 x half> %op)
1690  store <64 x half> %res, ptr %a
1691  ret void
1692}
1693
1694define void @frintz_v128f16(ptr %a) vscale_range(16,0) #0 {
1695; CHECK-LABEL: frintz_v128f16:
1696; CHECK:       // %bb.0:
1697; CHECK-NEXT:    ptrue p0.h, vl128
1698; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1699; CHECK-NEXT:    frintz z0.h, p0/m, z0.h
1700; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
1701; CHECK-NEXT:    ret
1702  %op = load <128 x half>, ptr %a
1703  %res = call <128 x half> @llvm.trunc.v128f16(<128 x half> %op)
1704  store <128 x half> %res, ptr %a
1705  ret void
1706}
1707
1708; Don't use SVE for 64-bit vectors.
1709define <2 x float> @frintz_v2f32(<2 x float> %op) vscale_range(2,0) #0 {
1710; CHECK-LABEL: frintz_v2f32:
1711; CHECK:       // %bb.0:
1712; CHECK-NEXT:    frintz v0.2s, v0.2s
1713; CHECK-NEXT:    ret
1714  %res = call <2 x float> @llvm.trunc.v2f32(<2 x float> %op)
1715  ret <2 x float> %res
1716}
1717
1718; Don't use SVE for 128-bit vectors.
1719define <4 x float> @frintz_v4f32(<4 x float> %op) vscale_range(2,0) #0 {
1720; CHECK-LABEL: frintz_v4f32:
1721; CHECK:       // %bb.0:
1722; CHECK-NEXT:    frintz v0.4s, v0.4s
1723; CHECK-NEXT:    ret
1724  %res = call <4 x float> @llvm.trunc.v4f32(<4 x float> %op)
1725  ret <4 x float> %res
1726}
1727
1728define void @frintz_v8f32(ptr %a) vscale_range(2,0) #0 {
1729; CHECK-LABEL: frintz_v8f32:
1730; CHECK:       // %bb.0:
1731; CHECK-NEXT:    ptrue p0.s, vl8
1732; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1733; CHECK-NEXT:    frintz z0.s, p0/m, z0.s
1734; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
1735; CHECK-NEXT:    ret
1736  %op = load <8 x float>, ptr %a
1737  %res = call <8 x float> @llvm.trunc.v8f32(<8 x float> %op)
1738  store <8 x float> %res, ptr %a
1739  ret void
1740}
1741
1742define void @frintz_v16f32(ptr %a) #0 {
1743; VBITS_GE_256-LABEL: frintz_v16f32:
1744; VBITS_GE_256:       // %bb.0:
1745; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
1746; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
1747; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
1748; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
1749; VBITS_GE_256-NEXT:    frintz z0.s, p0/m, z0.s
1750; VBITS_GE_256-NEXT:    frintz z1.s, p0/m, z1.s
1751; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
1752; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x0]
1753; VBITS_GE_256-NEXT:    ret
1754;
1755; VBITS_GE_512-LABEL: frintz_v16f32:
1756; VBITS_GE_512:       // %bb.0:
1757; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
1758; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
1759; VBITS_GE_512-NEXT:    frintz z0.s, p0/m, z0.s
1760; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
1761; VBITS_GE_512-NEXT:    ret
1762  %op = load <16 x float>, ptr %a
1763  %res = call <16 x float> @llvm.trunc.v16f32(<16 x float> %op)
1764  store <16 x float> %res, ptr %a
1765  ret void
1766}
1767
1768define void @frintz_v32f32(ptr %a) vscale_range(8,0) #0 {
1769; CHECK-LABEL: frintz_v32f32:
1770; CHECK:       // %bb.0:
1771; CHECK-NEXT:    ptrue p0.s, vl32
1772; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1773; CHECK-NEXT:    frintz z0.s, p0/m, z0.s
1774; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
1775; CHECK-NEXT:    ret
1776  %op = load <32 x float>, ptr %a
1777  %res = call <32 x float> @llvm.trunc.v32f32(<32 x float> %op)
1778  store <32 x float> %res, ptr %a
1779  ret void
1780}
1781
1782define void @frintz_v64f32(ptr %a) vscale_range(16,0) #0 {
1783; CHECK-LABEL: frintz_v64f32:
1784; CHECK:       // %bb.0:
1785; CHECK-NEXT:    ptrue p0.s, vl64
1786; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1787; CHECK-NEXT:    frintz z0.s, p0/m, z0.s
1788; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
1789; CHECK-NEXT:    ret
1790  %op = load <64 x float>, ptr %a
1791  %res = call <64 x float> @llvm.trunc.v64f32(<64 x float> %op)
1792  store <64 x float> %res, ptr %a
1793  ret void
1794}
1795
1796; Don't use SVE for 64-bit vectors.
1797define <1 x double> @frintz_v1f64(<1 x double> %op) vscale_range(2,0) #0 {
1798; CHECK-LABEL: frintz_v1f64:
1799; CHECK:       // %bb.0:
1800; CHECK-NEXT:    frintz d0, d0
1801; CHECK-NEXT:    ret
1802  %res = call <1 x double> @llvm.trunc.v1f64(<1 x double> %op)
1803  ret <1 x double> %res
1804}
1805
1806; Don't use SVE for 128-bit vectors.
1807define <2 x double> @frintz_v2f64(<2 x double> %op) vscale_range(2,0) #0 {
1808; CHECK-LABEL: frintz_v2f64:
1809; CHECK:       // %bb.0:
1810; CHECK-NEXT:    frintz v0.2d, v0.2d
1811; CHECK-NEXT:    ret
1812  %res = call <2 x double> @llvm.trunc.v2f64(<2 x double> %op)
1813  ret <2 x double> %res
1814}
1815
1816define void @frintz_v4f64(ptr %a) vscale_range(2,0) #0 {
1817; CHECK-LABEL: frintz_v4f64:
1818; CHECK:       // %bb.0:
1819; CHECK-NEXT:    ptrue p0.d, vl4
1820; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1821; CHECK-NEXT:    frintz z0.d, p0/m, z0.d
1822; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
1823; CHECK-NEXT:    ret
1824  %op = load <4 x double>, ptr %a
1825  %res = call <4 x double> @llvm.trunc.v4f64(<4 x double> %op)
1826  store <4 x double> %res, ptr %a
1827  ret void
1828}
1829
1830define void @frintz_v8f64(ptr %a) #0 {
1831; VBITS_GE_256-LABEL: frintz_v8f64:
1832; VBITS_GE_256:       // %bb.0:
1833; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
1834; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
1835; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1836; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
1837; VBITS_GE_256-NEXT:    frintz z0.d, p0/m, z0.d
1838; VBITS_GE_256-NEXT:    frintz z1.d, p0/m, z1.d
1839; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
1840; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x0]
1841; VBITS_GE_256-NEXT:    ret
1842;
1843; VBITS_GE_512-LABEL: frintz_v8f64:
1844; VBITS_GE_512:       // %bb.0:
1845; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
1846; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
1847; VBITS_GE_512-NEXT:    frintz z0.d, p0/m, z0.d
1848; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
1849; VBITS_GE_512-NEXT:    ret
1850  %op = load <8 x double>, ptr %a
1851  %res = call <8 x double> @llvm.trunc.v8f64(<8 x double> %op)
1852  store <8 x double> %res, ptr %a
1853  ret void
1854}
1855
1856define void @frintz_v16f64(ptr %a) vscale_range(8,0) #0 {
1857; CHECK-LABEL: frintz_v16f64:
1858; CHECK:       // %bb.0:
1859; CHECK-NEXT:    ptrue p0.d, vl16
1860; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1861; CHECK-NEXT:    frintz z0.d, p0/m, z0.d
1862; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
1863; CHECK-NEXT:    ret
1864  %op = load <16 x double>, ptr %a
1865  %res = call <16 x double> @llvm.trunc.v16f64(<16 x double> %op)
1866  store <16 x double> %res, ptr %a
1867  ret void
1868}
1869
1870define void @frintz_v32f64(ptr %a) vscale_range(16,0) #0 {
1871; CHECK-LABEL: frintz_v32f64:
1872; CHECK:       // %bb.0:
1873; CHECK-NEXT:    ptrue p0.d, vl32
1874; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1875; CHECK-NEXT:    frintz z0.d, p0/m, z0.d
1876; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
1877; CHECK-NEXT:    ret
1878  %op = load <32 x double>, ptr %a
1879  %res = call <32 x double> @llvm.trunc.v32f64(<32 x double> %op)
1880  store <32 x double> %res, ptr %a
1881  ret void
1882}
1883
1884attributes #0 = { "target-features"="+sve" }
1885
1886declare <4 x half> @llvm.ceil.v4f16(<4 x half>)
1887declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
1888declare <16 x half> @llvm.ceil.v16f16(<16 x half>)
1889declare <32 x half> @llvm.ceil.v32f16(<32 x half>)
1890declare <64 x half> @llvm.ceil.v64f16(<64 x half>)
1891declare <128 x half> @llvm.ceil.v128f16(<128 x half>)
1892declare <2 x float> @llvm.ceil.v2f32(<2 x float>)
1893declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
1894declare <8 x float> @llvm.ceil.v8f32(<8 x float>)
1895declare <16 x float> @llvm.ceil.v16f32(<16 x float>)
1896declare <32 x float> @llvm.ceil.v32f32(<32 x float>)
1897declare <64 x float> @llvm.ceil.v64f32(<64 x float>)
1898declare <1 x double> @llvm.ceil.v1f64(<1 x double>)
1899declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
1900declare <4 x double> @llvm.ceil.v4f64(<4 x double>)
1901declare <8 x double> @llvm.ceil.v8f64(<8 x double>)
1902declare <16 x double> @llvm.ceil.v16f64(<16 x double>)
1903declare <32 x double> @llvm.ceil.v32f64(<32 x double>)
1904
1905declare <4 x half> @llvm.floor.v4f16(<4 x half>)
1906declare <8 x half> @llvm.floor.v8f16(<8 x half>)
1907declare <16 x half> @llvm.floor.v16f16(<16 x half>)
1908declare <32 x half> @llvm.floor.v32f16(<32 x half>)
1909declare <64 x half> @llvm.floor.v64f16(<64 x half>)
1910declare <128 x half> @llvm.floor.v128f16(<128 x half>)
1911declare <2 x float> @llvm.floor.v2f32(<2 x float>)
1912declare <4 x float> @llvm.floor.v4f32(<4 x float>)
1913declare <8 x float> @llvm.floor.v8f32(<8 x float>)
1914declare <16 x float> @llvm.floor.v16f32(<16 x float>)
1915declare <32 x float> @llvm.floor.v32f32(<32 x float>)
1916declare <64 x float> @llvm.floor.v64f32(<64 x float>)
1917declare <1 x double> @llvm.floor.v1f64(<1 x double>)
1918declare <2 x double> @llvm.floor.v2f64(<2 x double>)
1919declare <4 x double> @llvm.floor.v4f64(<4 x double>)
1920declare <8 x double> @llvm.floor.v8f64(<8 x double>)
1921declare <16 x double> @llvm.floor.v16f64(<16 x double>)
1922declare <32 x double> @llvm.floor.v32f64(<32 x double>)
1923
1924declare <4 x half> @llvm.nearbyint.v4f16(<4 x half>)
1925declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
1926declare <16 x half> @llvm.nearbyint.v16f16(<16 x half>)
1927declare <32 x half> @llvm.nearbyint.v32f16(<32 x half>)
1928declare <64 x half> @llvm.nearbyint.v64f16(<64 x half>)
1929declare <128 x half> @llvm.nearbyint.v128f16(<128 x half>)
1930declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>)
1931declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
1932declare <8 x float> @llvm.nearbyint.v8f32(<8 x float>)
1933declare <16 x float> @llvm.nearbyint.v16f32(<16 x float>)
1934declare <32 x float> @llvm.nearbyint.v32f32(<32 x float>)
1935declare <64 x float> @llvm.nearbyint.v64f32(<64 x float>)
1936declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>)
1937declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
1938declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>)
1939declare <8 x double> @llvm.nearbyint.v8f64(<8 x double>)
1940declare <16 x double> @llvm.nearbyint.v16f64(<16 x double>)
1941declare <32 x double> @llvm.nearbyint.v32f64(<32 x double>)
1942
1943declare <4 x half> @llvm.rint.v4f16(<4 x half>)
1944declare <8 x half> @llvm.rint.v8f16(<8 x half>)
1945declare <16 x half> @llvm.rint.v16f16(<16 x half>)
1946declare <32 x half> @llvm.rint.v32f16(<32 x half>)
1947declare <64 x half> @llvm.rint.v64f16(<64 x half>)
1948declare <128 x half> @llvm.rint.v128f16(<128 x half>)
1949declare <2 x float> @llvm.rint.v2f32(<2 x float>)
1950declare <4 x float> @llvm.rint.v4f32(<4 x float>)
1951declare <8 x float> @llvm.rint.v8f32(<8 x float>)
1952declare <16 x float> @llvm.rint.v16f32(<16 x float>)
1953declare <32 x float> @llvm.rint.v32f32(<32 x float>)
1954declare <64 x float> @llvm.rint.v64f32(<64 x float>)
1955declare <1 x double> @llvm.rint.v1f64(<1 x double>)
1956declare <2 x double> @llvm.rint.v2f64(<2 x double>)
1957declare <4 x double> @llvm.rint.v4f64(<4 x double>)
1958declare <8 x double> @llvm.rint.v8f64(<8 x double>)
1959declare <16 x double> @llvm.rint.v16f64(<16 x double>)
1960declare <32 x double> @llvm.rint.v32f64(<32 x double>)
1961
1962declare <4 x half> @llvm.round.v4f16(<4 x half>)
1963declare <8 x half> @llvm.round.v8f16(<8 x half>)
1964declare <16 x half> @llvm.round.v16f16(<16 x half>)
1965declare <32 x half> @llvm.round.v32f16(<32 x half>)
1966declare <64 x half> @llvm.round.v64f16(<64 x half>)
1967declare <128 x half> @llvm.round.v128f16(<128 x half>)
1968declare <2 x float> @llvm.round.v2f32(<2 x float>)
1969declare <4 x float> @llvm.round.v4f32(<4 x float>)
1970declare <8 x float> @llvm.round.v8f32(<8 x float>)
1971declare <16 x float> @llvm.round.v16f32(<16 x float>)
1972declare <32 x float> @llvm.round.v32f32(<32 x float>)
1973declare <64 x float> @llvm.round.v64f32(<64 x float>)
1974declare <1 x double> @llvm.round.v1f64(<1 x double>)
1975declare <2 x double> @llvm.round.v2f64(<2 x double>)
1976declare <4 x double> @llvm.round.v4f64(<4 x double>)
1977declare <8 x double> @llvm.round.v8f64(<8 x double>)
1978declare <16 x double> @llvm.round.v16f64(<16 x double>)
1979declare <32 x double> @llvm.round.v32f64(<32 x double>)
1980
1981declare <4 x half> @llvm.roundeven.v4f16(<4 x half>)
1982declare <8 x half> @llvm.roundeven.v8f16(<8 x half>)
1983declare <16 x half> @llvm.roundeven.v16f16(<16 x half>)
1984declare <32 x half> @llvm.roundeven.v32f16(<32 x half>)
1985declare <64 x half> @llvm.roundeven.v64f16(<64 x half>)
1986declare <128 x half> @llvm.roundeven.v128f16(<128 x half>)
1987declare <2 x float> @llvm.roundeven.v2f32(<2 x float>)
1988declare <4 x float> @llvm.roundeven.v4f32(<4 x float>)
1989declare <8 x float> @llvm.roundeven.v8f32(<8 x float>)
1990declare <16 x float> @llvm.roundeven.v16f32(<16 x float>)
1991declare <32 x float> @llvm.roundeven.v32f32(<32 x float>)
1992declare <64 x float> @llvm.roundeven.v64f32(<64 x float>)
1993declare <1 x double> @llvm.roundeven.v1f64(<1 x double>)
1994declare <2 x double> @llvm.roundeven.v2f64(<2 x double>)
1995declare <4 x double> @llvm.roundeven.v4f64(<4 x double>)
1996declare <8 x double> @llvm.roundeven.v8f64(<8 x double>)
1997declare <16 x double> @llvm.roundeven.v16f64(<16 x double>)
1998declare <32 x double> @llvm.roundeven.v32f64(<32 x double>)
1999
2000declare <4 x half> @llvm.trunc.v4f16(<4 x half>)
2001declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
2002declare <16 x half> @llvm.trunc.v16f16(<16 x half>)
2003declare <32 x half> @llvm.trunc.v32f16(<32 x half>)
2004declare <64 x half> @llvm.trunc.v64f16(<64 x half>)
2005declare <128 x half> @llvm.trunc.v128f16(<128 x half>)
2006declare <2 x float> @llvm.trunc.v2f32(<2 x float>)
2007declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
2008declare <8 x float> @llvm.trunc.v8f32(<8 x float>)
2009declare <16 x float> @llvm.trunc.v16f32(<16 x float>)
2010declare <32 x float> @llvm.trunc.v32f32(<32 x float>)
2011declare <64 x float> @llvm.trunc.v64f32(<64 x float>)
2012declare <1 x double> @llvm.trunc.v1f64(<1 x double>)
2013declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
2014declare <4 x double> @llvm.trunc.v4f64(<4 x double>)
2015declare <8 x double> @llvm.trunc.v8f64(<8 x double>)
2016declare <16 x double> @llvm.trunc.v16f64(<16 x double>)
2017declare <32 x double> @llvm.trunc.v32f64(<32 x double>)
2018