xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5
6target triple = "aarch64-unknown-linux-gnu"
7
8;
9; FCVTZU H -> H
10;
11
12; Don't use SVE for 64-bit vectors.
13define <4 x i16> @fcvtzu_v4f16_v4i16(<4 x half> %op1) vscale_range(2,0) #0 {
14; CHECK-LABEL: fcvtzu_v4f16_v4i16:
15; CHECK:       // %bb.0:
16; CHECK-NEXT:    fcvtzu v0.4h, v0.4h
17; CHECK-NEXT:    ret
18  %res = fptoui <4 x half> %op1 to <4 x i16>
19  ret <4 x i16> %res
20}
21
22; Don't use SVE for 128-bit vectors.
23define void @fcvtzu_v8f16_v8i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
24; CHECK-LABEL: fcvtzu_v8f16_v8i16:
25; CHECK:       // %bb.0:
26; CHECK-NEXT:    ldr q0, [x0]
27; CHECK-NEXT:    fcvtzu v0.8h, v0.8h
28; CHECK-NEXT:    str q0, [x1]
29; CHECK-NEXT:    ret
30  %op1 = load <8 x half>, ptr %a
31  %res = fptoui <8 x half> %op1 to <8 x i16>
32  store <8 x i16> %res, ptr %b
33  ret void
34}
35
36define void @fcvtzu_v16f16_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
37; CHECK-LABEL: fcvtzu_v16f16_v16i16:
38; CHECK:       // %bb.0:
39; CHECK-NEXT:    ptrue p0.h, vl16
40; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
41; CHECK-NEXT:    fcvtzu z0.h, p0/m, z0.h
42; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
43; CHECK-NEXT:    ret
44  %op1 = load <16 x half>, ptr %a
45  %res = fptoui <16 x half> %op1 to <16 x i16>
46  store <16 x i16> %res, ptr %b
47  ret void
48}
49
50define void @fcvtzu_v32f16_v32i16(ptr %a, ptr %b) #0 {
51; VBITS_GE_256-LABEL: fcvtzu_v32f16_v32i16:
52; VBITS_GE_256:       // %bb.0:
53; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
54; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
55; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
56; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
57; VBITS_GE_256-NEXT:    fcvtzu z0.h, p0/m, z0.h
58; VBITS_GE_256-NEXT:    fcvtzu z1.h, p0/m, z1.h
59; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x1, x8, lsl #1]
60; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x1]
61; VBITS_GE_256-NEXT:    ret
62;
63; VBITS_GE_512-LABEL: fcvtzu_v32f16_v32i16:
64; VBITS_GE_512:       // %bb.0:
65; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
66; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
67; VBITS_GE_512-NEXT:    fcvtzu z0.h, p0/m, z0.h
68; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x1]
69; VBITS_GE_512-NEXT:    ret
70  %op1 = load <32 x half>, ptr %a
71  %res = fptoui <32 x half> %op1 to <32 x i16>
72  store <32 x i16> %res, ptr %b
73  ret void
74}
75
76define void @fcvtzu_v64f16_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
77; CHECK-LABEL: fcvtzu_v64f16_v64i16:
78; CHECK:       // %bb.0:
79; CHECK-NEXT:    ptrue p0.h, vl64
80; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
81; CHECK-NEXT:    fcvtzu z0.h, p0/m, z0.h
82; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
83; CHECK-NEXT:    ret
84  %op1 = load <64 x half>, ptr %a
85  %res = fptoui <64 x half> %op1 to <64 x i16>
86  store <64 x i16> %res, ptr %b
87  ret void
88}
89
90define void @fcvtzu_v128f16_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
91; CHECK-LABEL: fcvtzu_v128f16_v128i16:
92; CHECK:       // %bb.0:
93; CHECK-NEXT:    ptrue p0.h, vl128
94; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
95; CHECK-NEXT:    fcvtzu z0.h, p0/m, z0.h
96; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
97; CHECK-NEXT:    ret
98  %op1 = load <128 x half>, ptr %a
99  %res = fptoui <128 x half> %op1 to <128 x i16>
100  store <128 x i16> %res, ptr %b
101  ret void
102}
103
104;
105; FCVTZU H -> S
106;
107
108; Don't use SVE for 64-bit vectors.
109define <2 x i32> @fcvtzu_v2f16_v2i32(<2 x half> %op1) vscale_range(2,0) #0 {
110; CHECK-LABEL: fcvtzu_v2f16_v2i32:
111; CHECK:       // %bb.0:
112; CHECK-NEXT:    fcvtl v0.4s, v0.4h
113; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
114; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
115; CHECK-NEXT:    ret
116  %res = fptoui <2 x half> %op1 to <2 x i32>
117  ret <2 x i32> %res
118}
119
120; Don't use SVE for 128-bit vectors.
121define <4 x i32> @fcvtzu_v4f16_v4i32(<4 x half> %op1) vscale_range(2,0) #0 {
122; CHECK-LABEL: fcvtzu_v4f16_v4i32:
123; CHECK:       // %bb.0:
124; CHECK-NEXT:    fcvtl v0.4s, v0.4h
125; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
126; CHECK-NEXT:    ret
127  %res = fptoui <4 x half> %op1 to <4 x i32>
128  ret <4 x i32> %res
129}
130
131define void @fcvtzu_v8f16_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
132; CHECK-LABEL: fcvtzu_v8f16_v8i32:
133; CHECK:       // %bb.0:
134; CHECK-NEXT:    ldr q0, [x0]
135; CHECK-NEXT:    ptrue p0.s, vl8
136; CHECK-NEXT:    uunpklo z0.s, z0.h
137; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.h
138; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
139; CHECK-NEXT:    ret
140  %op1 = load <8 x half>, ptr %a
141  %res = fptoui <8 x half> %op1 to <8 x i32>
142  store <8 x i32> %res, ptr %b
143  ret void
144}
145
146define void @fcvtzu_v16f16_v16i32(ptr %a, ptr %b) #0 {
147; VBITS_GE_256-LABEL: fcvtzu_v16f16_v16i32:
148; VBITS_GE_256:       // %bb.0:
149; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
150; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
151; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0]
152; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
153; VBITS_GE_256-NEXT:    uunpklo z1.s, z0.h
154; VBITS_GE_256-NEXT:    ext z0.b, z0.b, z0.b, #16
155; VBITS_GE_256-NEXT:    uunpklo z0.s, z0.h
156; VBITS_GE_256-NEXT:    fcvtzu z1.s, p0/m, z1.h
157; VBITS_GE_256-NEXT:    fcvtzu z0.s, p0/m, z0.h
158; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x1]
159; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
160; VBITS_GE_256-NEXT:    ret
161;
162; VBITS_GE_512-LABEL: fcvtzu_v16f16_v16i32:
163; VBITS_GE_512:       // %bb.0:
164; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
165; VBITS_GE_512-NEXT:    ld1h { z0.s }, p0/z, [x0]
166; VBITS_GE_512-NEXT:    fcvtzu z0.s, p0/m, z0.h
167; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x1]
168; VBITS_GE_512-NEXT:    ret
169  %op1 = load <16 x half>, ptr %a
170  %res = fptoui <16 x half> %op1 to <16 x i32>
171  store <16 x i32> %res, ptr %b
172  ret void
173}
174
175define void @fcvtzu_v32f16_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
176; CHECK-LABEL: fcvtzu_v32f16_v32i32:
177; CHECK:       // %bb.0:
178; CHECK-NEXT:    ptrue p0.s, vl32
179; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
180; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.h
181; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
182; CHECK-NEXT:    ret
183  %op1 = load <32 x half>, ptr %a
184  %res = fptoui <32 x half> %op1 to <32 x i32>
185  store <32 x i32> %res, ptr %b
186  ret void
187}
188
189define void @fcvtzu_v64f16_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
190; CHECK-LABEL: fcvtzu_v64f16_v64i32:
191; CHECK:       // %bb.0:
192; CHECK-NEXT:    ptrue p0.s, vl64
193; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
194; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.h
195; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
196; CHECK-NEXT:    ret
197  %op1 = load <64 x half>, ptr %a
198  %res = fptoui <64 x half> %op1 to <64 x i32>
199  store <64 x i32> %res, ptr %b
200  ret void
201}
202
203;
204; FCVTZU H -> D
205;
206
207; Don't use SVE for 64-bit vectors.
208define <1 x i64> @fcvtzu_v1f16_v1i64(<1 x half> %op1) vscale_range(2,0) #0 {
209; CHECK-LABEL: fcvtzu_v1f16_v1i64:
210; CHECK:       // %bb.0:
211; CHECK-NEXT:    fcvtzu x8, h0
212; CHECK-NEXT:    fmov d0, x8
213; CHECK-NEXT:    ret
214  %res = fptoui <1 x half> %op1 to <1 x i64>
215  ret <1 x i64> %res
216}
217
218; v2f16 is not legal for NEON, so use SVE
219define <2 x i64> @fcvtzu_v2f16_v2i64(<2 x half> %op1) vscale_range(2,0) #0 {
220; CHECK-LABEL: fcvtzu_v2f16_v2i64:
221; CHECK:       // %bb.0:
222; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
223; CHECK-NEXT:    ptrue p0.d, vl4
224; CHECK-NEXT:    uunpklo z0.s, z0.h
225; CHECK-NEXT:    uunpklo z0.d, z0.s
226; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.h
227; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
228; CHECK-NEXT:    ret
229  %res = fptoui <2 x half> %op1 to <2 x i64>
230  ret <2 x i64> %res
231}
232
233define void @fcvtzu_v4f16_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
234; CHECK-LABEL: fcvtzu_v4f16_v4i64:
235; CHECK:       // %bb.0:
236; CHECK-NEXT:    ldr d0, [x0]
237; CHECK-NEXT:    ptrue p0.d, vl4
238; CHECK-NEXT:    uunpklo z0.s, z0.h
239; CHECK-NEXT:    uunpklo z0.d, z0.s
240; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.h
241; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
242; CHECK-NEXT:    ret
243  %op1 = load <4 x half>, ptr %a
244  %res = fptoui <4 x half> %op1 to <4 x i64>
245  store <4 x i64> %res, ptr %b
246  ret void
247}
248
249define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) #0 {
250; VBITS_GE_256-LABEL: fcvtzu_v8f16_v8i64:
251; VBITS_GE_256:       // %bb.0:
252; VBITS_GE_256-NEXT:    ldr q0, [x0]
253; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
254; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
255; VBITS_GE_256-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
256; VBITS_GE_256-NEXT:    uunpklo z0.s, z0.h
257; VBITS_GE_256-NEXT:    uunpklo z1.s, z1.h
258; VBITS_GE_256-NEXT:    uunpklo z0.d, z0.s
259; VBITS_GE_256-NEXT:    uunpklo z1.d, z1.s
260; VBITS_GE_256-NEXT:    fcvtzu z0.d, p0/m, z0.h
261; VBITS_GE_256-NEXT:    fcvtzu z1.d, p0/m, z1.h
262; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x1]
263; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x1, x8, lsl #3]
264; VBITS_GE_256-NEXT:    ret
265;
266; VBITS_GE_512-LABEL: fcvtzu_v8f16_v8i64:
267; VBITS_GE_512:       // %bb.0:
268; VBITS_GE_512-NEXT:    ldr q0, [x0]
269; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
270; VBITS_GE_512-NEXT:    uunpklo z0.s, z0.h
271; VBITS_GE_512-NEXT:    uunpklo z0.d, z0.s
272; VBITS_GE_512-NEXT:    fcvtzu z0.d, p0/m, z0.h
273; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x1]
274; VBITS_GE_512-NEXT:    ret
275  %op1 = load <8 x half>, ptr %a
276  %res = fptoui <8 x half> %op1 to <8 x i64>
277  store <8 x i64> %res, ptr %b
278  ret void
279}
280
281define void @fcvtzu_v16f16_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
282; CHECK-LABEL: fcvtzu_v16f16_v16i64:
283; CHECK:       // %bb.0:
284; CHECK-NEXT:    ptrue p0.d, vl16
285; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0]
286; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.h
287; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
288; CHECK-NEXT:    ret
289  %op1 = load <16 x half>, ptr %a
290  %res = fptoui <16 x half> %op1 to <16 x i64>
291  store <16 x i64> %res, ptr %b
292  ret void
293}
294
295define void @fcvtzu_v32f16_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
296; CHECK-LABEL: fcvtzu_v32f16_v32i64:
297; CHECK:       // %bb.0:
298; CHECK-NEXT:    ptrue p0.d, vl32
299; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0]
300; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.h
301; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
302; CHECK-NEXT:    ret
303  %op1 = load <32 x half>, ptr %a
304  %res = fptoui <32 x half> %op1 to <32 x i64>
305  store <32 x i64> %res, ptr %b
306  ret void
307}
308
309;
310; FCVTZU S -> H
311;
312
313; Don't use SVE for 64-bit vectors.
314define <2 x i16> @fcvtzu_v2f32_v2i16(<2 x float> %op1) vscale_range(2,0) #0 {
315; CHECK-LABEL: fcvtzu_v2f32_v2i16:
316; CHECK:       // %bb.0:
317; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
318; CHECK-NEXT:    ret
319  %res = fptoui <2 x float> %op1 to <2 x i16>
320  ret <2 x i16> %res
321}
322
323; Don't use SVE for 128-bit vectors.
324define <4 x i16> @fcvtzu_v4f32_v4i16(<4 x float> %op1) vscale_range(2,0) #0 {
325; CHECK-LABEL: fcvtzu_v4f32_v4i16:
326; CHECK:       // %bb.0:
327; CHECK-NEXT:    fcvtzu v1.4s, v0.4s
328; CHECK-NEXT:    mov w8, v1.s[1]
329; CHECK-NEXT:    mov v0.16b, v1.16b
330; CHECK-NEXT:    mov w9, v1.s[2]
331; CHECK-NEXT:    mov v0.h[1], w8
332; CHECK-NEXT:    mov w8, v1.s[3]
333; CHECK-NEXT:    mov v0.h[2], w9
334; CHECK-NEXT:    mov v0.h[3], w8
335; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
336; CHECK-NEXT:    ret
337  %res = fptoui <4 x float> %op1 to <4 x i16>
338  ret <4 x i16> %res
339}
340
341define <8 x i16> @fcvtzu_v8f32_v8i16(ptr %a) vscale_range(2,0) #0 {
342; CHECK-LABEL: fcvtzu_v8f32_v8i16:
343; CHECK:       // %bb.0:
344; CHECK-NEXT:    ptrue p0.s, vl8
345; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
346; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.s
347; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
348; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
349; CHECK-NEXT:    ret
350  %op1 = load <8 x float>, ptr %a
351  %res = fptoui <8 x float> %op1 to <8 x i16>
352  ret <8 x i16> %res
353}
354
355define void @fcvtzu_v16f32_v16i16(ptr %a, ptr %b) #0 {
356; VBITS_GE_256-LABEL: fcvtzu_v16f32_v16i16:
357; VBITS_GE_256:       // %bb.0:
358; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
359; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
360; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
361; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
362; VBITS_GE_256-NEXT:    fcvtzu z0.s, p0/m, z0.s
363; VBITS_GE_256-NEXT:    fcvtzu z1.s, p0/m, z1.s
364; VBITS_GE_256-NEXT:    ptrue p0.h, vl8
365; VBITS_GE_256-NEXT:    uzp1 z0.h, z0.h, z0.h
366; VBITS_GE_256-NEXT:    uzp1 z1.h, z1.h, z1.h
367; VBITS_GE_256-NEXT:    splice z1.h, p0, z1.h, z0.h
368; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
369; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x1]
370; VBITS_GE_256-NEXT:    ret
371;
372; VBITS_GE_512-LABEL: fcvtzu_v16f32_v16i16:
373; VBITS_GE_512:       // %bb.0:
374; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
375; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
376; VBITS_GE_512-NEXT:    fcvtzu z0.s, p0/m, z0.s
377; VBITS_GE_512-NEXT:    st1h { z0.s }, p0, [x1]
378; VBITS_GE_512-NEXT:    ret
379  %op1 = load <16 x float>, ptr %a
380  %res = fptoui <16 x float> %op1 to <16 x i16>
381  store <16 x i16> %res, ptr %b
382  ret void
383}
384
385define void @fcvtzu_v32f32_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
386; CHECK-LABEL: fcvtzu_v32f32_v32i16:
387; CHECK:       // %bb.0:
388; CHECK-NEXT:    ptrue p0.s, vl32
389; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
390; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.s
391; CHECK-NEXT:    st1h { z0.s }, p0, [x1]
392; CHECK-NEXT:    ret
393  %op1 = load <32 x float>, ptr %a
394  %res = fptoui <32 x float> %op1 to <32 x i16>
395  store <32 x i16> %res, ptr %b
396  ret void
397}
398
399define void @fcvtzu_v64f32_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
400; CHECK-LABEL: fcvtzu_v64f32_v64i16:
401; CHECK:       // %bb.0:
402; CHECK-NEXT:    ptrue p0.s, vl64
403; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
404; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.s
405; CHECK-NEXT:    st1h { z0.s }, p0, [x1]
406; CHECK-NEXT:    ret
407  %op1 = load <64 x float>, ptr %a
408  %res = fptoui <64 x float> %op1 to <64 x i16>
409  store <64 x i16> %res, ptr %b
410  ret void
411}
412
413;
414; FCVTZU S -> S
415;
416
417; Don't use SVE for 64-bit vectors.
418define <2 x i32> @fcvtzu_v2f32_v2i32(<2 x float> %op1) vscale_range(2,0) #0 {
419; CHECK-LABEL: fcvtzu_v2f32_v2i32:
420; CHECK:       // %bb.0:
421; CHECK-NEXT:    fcvtzu v0.2s, v0.2s
422; CHECK-NEXT:    ret
423  %res = fptoui <2 x float> %op1 to <2 x i32>
424  ret <2 x i32> %res
425}
426
427; Don't use SVE for 128-bit vectors.
428define <4 x i32> @fcvtzu_v4f32_v4i32(<4 x float> %op1) vscale_range(2,0) #0 {
429; CHECK-LABEL: fcvtzu_v4f32_v4i32:
430; CHECK:       // %bb.0:
431; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
432; CHECK-NEXT:    ret
433  %res = fptoui <4 x float> %op1 to <4 x i32>
434  ret <4 x i32> %res
435}
436
437define void @fcvtzu_v8f32_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
438; CHECK-LABEL: fcvtzu_v8f32_v8i32:
439; CHECK:       // %bb.0:
440; CHECK-NEXT:    ptrue p0.s, vl8
441; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
442; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.s
443; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
444; CHECK-NEXT:    ret
445  %op1 = load <8 x float>, ptr %a
446  %res = fptoui <8 x float> %op1 to <8 x i32>
447  store <8 x i32> %res, ptr %b
448  ret void
449}
450
451define void @fcvtzu_v16f32_v16i32(ptr %a, ptr %b) #0 {
452; VBITS_GE_256-LABEL: fcvtzu_v16f32_v16i32:
453; VBITS_GE_256:       // %bb.0:
454; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
455; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
456; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
457; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
458; VBITS_GE_256-NEXT:    fcvtzu z0.s, p0/m, z0.s
459; VBITS_GE_256-NEXT:    fcvtzu z1.s, p0/m, z1.s
460; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
461; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x1]
462; VBITS_GE_256-NEXT:    ret
463;
464; VBITS_GE_512-LABEL: fcvtzu_v16f32_v16i32:
465; VBITS_GE_512:       // %bb.0:
466; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
467; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
468; VBITS_GE_512-NEXT:    fcvtzu z0.s, p0/m, z0.s
469; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x1]
470; VBITS_GE_512-NEXT:    ret
471  %op1 = load <16 x float>, ptr %a
472  %res = fptoui <16 x float> %op1 to <16 x i32>
473  store <16 x i32> %res, ptr %b
474  ret void
475}
476
477define void @fcvtzu_v32f32_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
478; CHECK-LABEL: fcvtzu_v32f32_v32i32:
479; CHECK:       // %bb.0:
480; CHECK-NEXT:    ptrue p0.s, vl32
481; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
482; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.s
483; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
484; CHECK-NEXT:    ret
485  %op1 = load <32 x float>, ptr %a
486  %res = fptoui <32 x float> %op1 to <32 x i32>
487  store <32 x i32> %res, ptr %b
488  ret void
489}
490
491define void @fcvtzu_v64f32_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
492; CHECK-LABEL: fcvtzu_v64f32_v64i32:
493; CHECK:       // %bb.0:
494; CHECK-NEXT:    ptrue p0.s, vl64
495; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
496; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.s
497; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
498; CHECK-NEXT:    ret
499  %op1 = load <64 x float>, ptr %a
500  %res = fptoui <64 x float> %op1 to <64 x i32>
501  store <64 x i32> %res, ptr %b
502  ret void
503}
504
505;
506; FCVTZU S -> D
507;
508
509; Don't use SVE for 64-bit vectors.
510define <1 x i64> @fcvtzu_v1f32_v1i64(<1 x float> %op1) vscale_range(2,0) #0 {
511; CHECK-LABEL: fcvtzu_v1f32_v1i64:
512; CHECK:       // %bb.0:
513; CHECK-NEXT:    fcvtl v0.2d, v0.2s
514; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
515; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
516; CHECK-NEXT:    ret
517  %res = fptoui <1 x float> %op1 to <1 x i64>
518  ret <1 x i64> %res
519}
520
521; Don't use SVE for 128-bit vectors.
522define <2 x i64> @fcvtzu_v2f32_v2i64(<2 x float> %op1) vscale_range(2,0) #0 {
523; CHECK-LABEL: fcvtzu_v2f32_v2i64:
524; CHECK:       // %bb.0:
525; CHECK-NEXT:    fcvtl v0.2d, v0.2s
526; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
527; CHECK-NEXT:    ret
528  %res = fptoui <2 x float> %op1 to <2 x i64>
529  ret <2 x i64> %res
530}
531
532define void @fcvtzu_v4f32_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
533; CHECK-LABEL: fcvtzu_v4f32_v4i64:
534; CHECK:       // %bb.0:
535; CHECK-NEXT:    ldr q0, [x0]
536; CHECK-NEXT:    ptrue p0.d, vl4
537; CHECK-NEXT:    uunpklo z0.d, z0.s
538; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.s
539; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
540; CHECK-NEXT:    ret
541  %op1 = load <4 x float>, ptr %a
542  %res = fptoui <4 x float> %op1 to <4 x i64>
543  store <4 x i64> %res, ptr %b
544  ret void
545}
546
547define void @fcvtzu_v8f32_v8i64(ptr %a, ptr %b) #0 {
548; VBITS_GE_256-LABEL: fcvtzu_v8f32_v8i64:
549; VBITS_GE_256:       // %bb.0:
550; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
551; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
552; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0]
553; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
554; VBITS_GE_256-NEXT:    uunpklo z1.d, z0.s
555; VBITS_GE_256-NEXT:    ext z0.b, z0.b, z0.b, #16
556; VBITS_GE_256-NEXT:    uunpklo z0.d, z0.s
557; VBITS_GE_256-NEXT:    fcvtzu z1.d, p0/m, z1.s
558; VBITS_GE_256-NEXT:    fcvtzu z0.d, p0/m, z0.s
559; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x1]
560; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
561; VBITS_GE_256-NEXT:    ret
562;
563; VBITS_GE_512-LABEL: fcvtzu_v8f32_v8i64:
564; VBITS_GE_512:       // %bb.0:
565; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
566; VBITS_GE_512-NEXT:    ld1w { z0.d }, p0/z, [x0]
567; VBITS_GE_512-NEXT:    fcvtzu z0.d, p0/m, z0.s
568; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x1]
569; VBITS_GE_512-NEXT:    ret
570  %op1 = load <8 x float>, ptr %a
571  %res = fptoui <8 x float> %op1 to <8 x i64>
572  store <8 x i64> %res, ptr %b
573  ret void
574}
575
576define void @fcvtzu_v16f32_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
577; CHECK-LABEL: fcvtzu_v16f32_v16i64:
578; CHECK:       // %bb.0:
579; CHECK-NEXT:    ptrue p0.d, vl16
580; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0]
581; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.s
582; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
583; CHECK-NEXT:    ret
584  %op1 = load <16 x float>, ptr %a
585  %res = fptoui <16 x float> %op1 to <16 x i64>
586  store <16 x i64> %res, ptr %b
587  ret void
588}
589
590define void @fcvtzu_v32f32_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
591; CHECK-LABEL: fcvtzu_v32f32_v32i64:
592; CHECK:       // %bb.0:
593; CHECK-NEXT:    ptrue p0.d, vl32
594; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0]
595; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.s
596; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
597; CHECK-NEXT:    ret
598  %op1 = load <32 x float>, ptr %a
599  %res = fptoui <32 x float> %op1 to <32 x i64>
600  store <32 x i64> %res, ptr %b
601  ret void
602}
603
604
605;
606; FCVTZU D -> H
607;
608
609; v1f64 is perfered to be widened to v4f64, so use SVE
610define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 {
611; CHECK-LABEL: fcvtzu_v1f64_v1i16:
612; CHECK:       // %bb.0:
613; CHECK-NEXT:    ptrue p0.d, vl4
614; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
615; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
616; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
617; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
618; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
619; CHECK-NEXT:    ret
620  %res = fptoui <1 x double> %op1 to <1 x i16>
621  ret <1 x i16> %res
622}
623
624; Don't use SVE for 128-bit vectors.
625define <2 x i16> @fcvtzu_v2f64_v2i16(<2 x double> %op1) vscale_range(2,0) #0 {
626; CHECK-LABEL: fcvtzu_v2f64_v2i16:
627; CHECK:       // %bb.0:
628; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
629; CHECK-NEXT:    xtn v0.2s, v0.2d
630; CHECK-NEXT:    ret
631  %res = fptoui <2 x double> %op1 to <2 x i16>
632  ret <2 x i16> %res
633}
634
635define <4 x i16> @fcvtzu_v4f64_v4i16(ptr %a) vscale_range(2,0) #0 {
636; CHECK-LABEL: fcvtzu_v4f64_v4i16:
637; CHECK:       // %bb.0:
638; CHECK-NEXT:    ptrue p0.d, vl4
639; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
640; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
641; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
642; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
643; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
644; CHECK-NEXT:    ret
645  %op1 = load <4 x double>, ptr %a
646  %res = fptoui <4 x double> %op1 to <4 x i16>
647  ret <4 x i16> %res
648}
649
650define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) #0 {
651; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i16:
652; VBITS_GE_256:       // %bb.0:
653; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
654; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
655; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
656; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
657; VBITS_GE_256-NEXT:    fcvtzu z0.d, p0/m, z0.d
658; VBITS_GE_256-NEXT:    fcvtzu z1.d, p0/m, z1.d
659; VBITS_GE_256-NEXT:    uzp1 z0.s, z0.s, z0.s
660; VBITS_GE_256-NEXT:    uzp1 z1.s, z1.s, z1.s
661; VBITS_GE_256-NEXT:    uzp1 z2.h, z0.h, z0.h
662; VBITS_GE_256-NEXT:    uzp1 z0.h, z1.h, z1.h
663; VBITS_GE_256-NEXT:    mov v0.d[1], v2.d[0]
664; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
665; VBITS_GE_256-NEXT:    ret
666;
667; VBITS_GE_512-LABEL: fcvtzu_v8f64_v8i16:
668; VBITS_GE_512:       // %bb.0:
669; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
670; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
671; VBITS_GE_512-NEXT:    fcvtzu z0.d, p0/m, z0.d
672; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
673; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
674; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
675; VBITS_GE_512-NEXT:    ret
676  %op1 = load <8 x double>, ptr %a
677  %res = fptoui <8 x double> %op1 to <8 x i16>
678  ret <8 x i16> %res
679}
680
681define void @fcvtzu_v16f64_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
682; CHECK-LABEL: fcvtzu_v16f64_v16i16:
683; CHECK:       // %bb.0:
684; CHECK-NEXT:    ptrue p0.d, vl16
685; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
686; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
687; CHECK-NEXT:    st1h { z0.d }, p0, [x1]
688; CHECK-NEXT:    ret
689  %op1 = load <16 x double>, ptr %a
690  %res = fptoui <16 x double> %op1 to <16 x i16>
691  store <16 x i16> %res, ptr %b
692  ret void
693}
694
695define void @fcvtzu_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
696; CHECK-LABEL: fcvtzu_v32f64_v32i16:
697; CHECK:       // %bb.0:
698; CHECK-NEXT:    ptrue p0.d, vl32
699; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
700; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
701; CHECK-NEXT:    st1h { z0.d }, p0, [x1]
702; CHECK-NEXT:    ret
703  %op1 = load <32 x double>, ptr %a
704  %res = fptoui <32 x double> %op1 to <32 x i16>
705  store <32 x i16> %res, ptr %b
706  ret void
707}
708
709;
710; FCVTZU D -> S
711;
712
713; Don't use SVE for 64-bit vectors.
714define <1 x i32> @fcvtzu_v1f64_v1i32(<1 x double> %op1) vscale_range(2,0) #0 {
715; CHECK-LABEL: fcvtzu_v1f64_v1i32:
716; CHECK:       // %bb.0:
717; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
718; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
719; CHECK-NEXT:    xtn v0.2s, v0.2d
720; CHECK-NEXT:    ret
721  %res = fptoui <1 x double> %op1 to <1 x i32>
722  ret <1 x i32> %res
723}
724
725; Don't use SVE for 128-bit vectors.
726define <2 x i32> @fcvtzu_v2f64_v2i32(<2 x double> %op1) vscale_range(2,0) #0 {
727; CHECK-LABEL: fcvtzu_v2f64_v2i32:
728; CHECK:       // %bb.0:
729; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
730; CHECK-NEXT:    xtn v0.2s, v0.2d
731; CHECK-NEXT:    ret
732  %res = fptoui <2 x double> %op1 to <2 x i32>
733  ret <2 x i32> %res
734}
735
736define <4 x i32> @fcvtzu_v4f64_v4i32(ptr %a) vscale_range(2,0) #0 {
737; CHECK-LABEL: fcvtzu_v4f64_v4i32:
738; CHECK:       // %bb.0:
739; CHECK-NEXT:    ptrue p0.d, vl4
740; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
741; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
742; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
743; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
744; CHECK-NEXT:    ret
745  %op1 = load <4 x double>, ptr %a
746  %res = fptoui <4 x double> %op1 to <4 x i32>
747  ret <4 x i32> %res
748}
749
750define void @fcvtzu_v8f64_v8i32(ptr %a, ptr %b) #0 {
751; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i32:
752; VBITS_GE_256:       // %bb.0:
753; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
754; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
755; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
756; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
757; VBITS_GE_256-NEXT:    fcvtzu z0.d, p0/m, z0.d
758; VBITS_GE_256-NEXT:    fcvtzu z1.d, p0/m, z1.d
759; VBITS_GE_256-NEXT:    ptrue p0.s, vl4
760; VBITS_GE_256-NEXT:    uzp1 z0.s, z0.s, z0.s
761; VBITS_GE_256-NEXT:    uzp1 z1.s, z1.s, z1.s
762; VBITS_GE_256-NEXT:    splice z1.s, p0, z1.s, z0.s
763; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
764; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x1]
765; VBITS_GE_256-NEXT:    ret
766;
767; VBITS_GE_512-LABEL: fcvtzu_v8f64_v8i32:
768; VBITS_GE_512:       // %bb.0:
769; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
770; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
771; VBITS_GE_512-NEXT:    fcvtzu z0.d, p0/m, z0.d
772; VBITS_GE_512-NEXT:    st1w { z0.d }, p0, [x1]
773; VBITS_GE_512-NEXT:    ret
774  %op1 = load <8 x double>, ptr %a
775  %res = fptoui <8 x double> %op1 to <8 x i32>
776  store <8 x i32> %res, ptr %b
777  ret void
778}
779
780define void @fcvtzu_v16f64_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
781; CHECK-LABEL: fcvtzu_v16f64_v16i32:
782; CHECK:       // %bb.0:
783; CHECK-NEXT:    ptrue p0.d, vl16
784; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
785; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
786; CHECK-NEXT:    st1w { z0.d }, p0, [x1]
787; CHECK-NEXT:    ret
788  %op1 = load <16 x double>, ptr %a
789  %res = fptoui <16 x double> %op1 to <16 x i32>
790  store <16 x i32> %res, ptr %b
791  ret void
792}
793
794define void @fcvtzu_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
795; CHECK-LABEL: fcvtzu_v32f64_v32i32:
796; CHECK:       // %bb.0:
797; CHECK-NEXT:    ptrue p0.d, vl32
798; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
799; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
800; CHECK-NEXT:    st1w { z0.d }, p0, [x1]
801; CHECK-NEXT:    ret
802  %op1 = load <32 x double>, ptr %a
803  %res = fptoui <32 x double> %op1 to <32 x i32>
804  store <32 x i32> %res, ptr %b
805  ret void
806}
807
808;
809; FCVTZU D -> D
810;
811
812; Don't use SVE for 64-bit vectors.
813define <1 x i64> @fcvtzu_v1f64_v1i64(<1 x double> %op1) vscale_range(2,0) #0 {
814; CHECK-LABEL: fcvtzu_v1f64_v1i64:
815; CHECK:       // %bb.0:
816; CHECK-NEXT:    fcvtzu x8, d0
817; CHECK-NEXT:    fmov d0, x8
818; CHECK-NEXT:    ret
819  %res = fptoui <1 x double> %op1 to <1 x i64>
820  ret <1 x i64> %res
821}
822
823; Don't use SVE for 128-bit vectors.
824define <2 x i64> @fcvtzu_v2f64_v2i64(<2 x double> %op1) vscale_range(2,0) #0 {
825; CHECK-LABEL: fcvtzu_v2f64_v2i64:
826; CHECK:       // %bb.0:
827; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
828; CHECK-NEXT:    ret
829  %res = fptoui <2 x double> %op1 to <2 x i64>
830  ret <2 x i64> %res
831}
832
833define void @fcvtzu_v4f64_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
834; CHECK-LABEL: fcvtzu_v4f64_v4i64:
835; CHECK:       // %bb.0:
836; CHECK-NEXT:    ptrue p0.d, vl4
837; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
838; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
839; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
840; CHECK-NEXT:    ret
841  %op1 = load <4 x double>, ptr %a
842  %res = fptoui <4 x double> %op1 to <4 x i64>
843  store <4 x i64> %res, ptr %b
844  ret void
845}
846
847define void @fcvtzu_v8f64_v8i64(ptr %a, ptr %b) #0 {
848; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i64:
849; VBITS_GE_256:       // %bb.0:
850; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
851; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
852; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
853; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
854; VBITS_GE_256-NEXT:    fcvtzu z0.d, p0/m, z0.d
855; VBITS_GE_256-NEXT:    fcvtzu z1.d, p0/m, z1.d
856; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
857; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x1]
858; VBITS_GE_256-NEXT:    ret
859;
860; VBITS_GE_512-LABEL: fcvtzu_v8f64_v8i64:
861; VBITS_GE_512:       // %bb.0:
862; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
863; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
864; VBITS_GE_512-NEXT:    fcvtzu z0.d, p0/m, z0.d
865; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x1]
866; VBITS_GE_512-NEXT:    ret
867  %op1 = load <8 x double>, ptr %a
868  %res = fptoui <8 x double> %op1 to <8 x i64>
869  store <8 x i64> %res, ptr %b
870  ret void
871}
872
873define void @fcvtzu_v16f64_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
874; CHECK-LABEL: fcvtzu_v16f64_v16i64:
875; CHECK:       // %bb.0:
876; CHECK-NEXT:    ptrue p0.d, vl16
877; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
878; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
879; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
880; CHECK-NEXT:    ret
881  %op1 = load <16 x double>, ptr %a
882  %res = fptoui <16 x double> %op1 to <16 x i64>
883  store <16 x i64> %res, ptr %b
884  ret void
885}
886
887define void @fcvtzu_v32f64_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
888; CHECK-LABEL: fcvtzu_v32f64_v32i64:
889; CHECK:       // %bb.0:
890; CHECK-NEXT:    ptrue p0.d, vl32
891; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
892; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
893; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
894; CHECK-NEXT:    ret
895  %op1 = load <32 x double>, ptr %a
896  %res = fptoui <32 x double> %op1 to <32 x i64>
897  store <32 x i64> %res, ptr %b
898  ret void
899}
900
901;
902; FCVTZS H -> H
903;
904
905; Don't use SVE for 64-bit vectors.
906define <4 x i16> @fcvtzs_v4f16_v4i16(<4 x half> %op1) vscale_range(2,0) #0 {
907; CHECK-LABEL: fcvtzs_v4f16_v4i16:
908; CHECK:       // %bb.0:
909; CHECK-NEXT:    fcvtzs v0.4h, v0.4h
910; CHECK-NEXT:    ret
911  %res = fptosi <4 x half> %op1 to <4 x i16>
912  ret <4 x i16> %res
913}
914
915; Don't use SVE for 128-bit vectors.
916define void @fcvtzs_v8f16_v8i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
917; CHECK-LABEL: fcvtzs_v8f16_v8i16:
918; CHECK:       // %bb.0:
919; CHECK-NEXT:    ldr q0, [x0]
920; CHECK-NEXT:    fcvtzs v0.8h, v0.8h
921; CHECK-NEXT:    str q0, [x1]
922; CHECK-NEXT:    ret
923  %op1 = load <8 x half>, ptr %a
924  %res = fptosi <8 x half> %op1 to <8 x i16>
925  store <8 x i16> %res, ptr %b
926  ret void
927}
928
929define void @fcvtzs_v16f16_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
930; CHECK-LABEL: fcvtzs_v16f16_v16i16:
931; CHECK:       // %bb.0:
932; CHECK-NEXT:    ptrue p0.h, vl16
933; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
934; CHECK-NEXT:    fcvtzs z0.h, p0/m, z0.h
935; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
936; CHECK-NEXT:    ret
937  %op1 = load <16 x half>, ptr %a
938  %res = fptosi <16 x half> %op1 to <16 x i16>
939  store <16 x i16> %res, ptr %b
940  ret void
941}
942
943define void @fcvtzs_v32f16_v32i16(ptr %a, ptr %b) #0 {
944; VBITS_GE_256-LABEL: fcvtzs_v32f16_v32i16:
945; VBITS_GE_256:       // %bb.0:
946; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
947; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
948; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
949; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
950; VBITS_GE_256-NEXT:    fcvtzs z0.h, p0/m, z0.h
951; VBITS_GE_256-NEXT:    fcvtzs z1.h, p0/m, z1.h
952; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x1, x8, lsl #1]
953; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x1]
954; VBITS_GE_256-NEXT:    ret
955;
956; VBITS_GE_512-LABEL: fcvtzs_v32f16_v32i16:
957; VBITS_GE_512:       // %bb.0:
958; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
959; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
960; VBITS_GE_512-NEXT:    fcvtzs z0.h, p0/m, z0.h
961; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x1]
962; VBITS_GE_512-NEXT:    ret
963  %op1 = load <32 x half>, ptr %a
964  %res = fptosi <32 x half> %op1 to <32 x i16>
965  store <32 x i16> %res, ptr %b
966  ret void
967}
968
969define void @fcvtzs_v64f16_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
970; CHECK-LABEL: fcvtzs_v64f16_v64i16:
971; CHECK:       // %bb.0:
972; CHECK-NEXT:    ptrue p0.h, vl64
973; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
974; CHECK-NEXT:    fcvtzs z0.h, p0/m, z0.h
975; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
976; CHECK-NEXT:    ret
977  %op1 = load <64 x half>, ptr %a
978  %res = fptosi <64 x half> %op1 to <64 x i16>
979  store <64 x i16> %res, ptr %b
980  ret void
981}
982
983define void @fcvtzs_v128f16_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
984; CHECK-LABEL: fcvtzs_v128f16_v128i16:
985; CHECK:       // %bb.0:
986; CHECK-NEXT:    ptrue p0.h, vl128
987; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
988; CHECK-NEXT:    fcvtzs z0.h, p0/m, z0.h
989; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
990; CHECK-NEXT:    ret
991  %op1 = load <128 x half>, ptr %a
992  %res = fptosi <128 x half> %op1 to <128 x i16>
993  store <128 x i16> %res, ptr %b
994  ret void
995}
996
997;
998; FCVTZS H -> S
999;
1000
1001; Don't use SVE for 64-bit vectors.
1002define <2 x i32> @fcvtzs_v2f16_v2i32(<2 x half> %op1) vscale_range(2,0) #0 {
1003; CHECK-LABEL: fcvtzs_v2f16_v2i32:
1004; CHECK:       // %bb.0:
1005; CHECK-NEXT:    fcvtl v0.4s, v0.4h
1006; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
1007; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
1008; CHECK-NEXT:    ret
1009  %res = fptosi <2 x half> %op1 to <2 x i32>
1010  ret <2 x i32> %res
1011}
1012
1013; Don't use SVE for 128-bit vectors.
1014define <4 x i32> @fcvtzs_v4f16_v4i32(<4 x half> %op1) vscale_range(2,0) #0 {
1015; CHECK-LABEL: fcvtzs_v4f16_v4i32:
1016; CHECK:       // %bb.0:
1017; CHECK-NEXT:    fcvtl v0.4s, v0.4h
1018; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
1019; CHECK-NEXT:    ret
1020  %res = fptosi <4 x half> %op1 to <4 x i32>
1021  ret <4 x i32> %res
1022}
1023
1024define void @fcvtzs_v8f16_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
1025; CHECK-LABEL: fcvtzs_v8f16_v8i32:
1026; CHECK:       // %bb.0:
1027; CHECK-NEXT:    ldr q0, [x0]
1028; CHECK-NEXT:    ptrue p0.s, vl8
1029; CHECK-NEXT:    uunpklo z0.s, z0.h
1030; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.h
1031; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
1032; CHECK-NEXT:    ret
1033  %op1 = load <8 x half>, ptr %a
1034  %res = fptosi <8 x half> %op1 to <8 x i32>
1035  store <8 x i32> %res, ptr %b
1036  ret void
1037}
1038
1039define void @fcvtzs_v16f16_v16i32(ptr %a, ptr %b) #0 {
1040; VBITS_GE_256-LABEL: fcvtzs_v16f16_v16i32:
1041; VBITS_GE_256:       // %bb.0:
1042; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
1043; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
1044; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0]
1045; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
1046; VBITS_GE_256-NEXT:    uunpklo z1.s, z0.h
1047; VBITS_GE_256-NEXT:    ext z0.b, z0.b, z0.b, #16
1048; VBITS_GE_256-NEXT:    uunpklo z0.s, z0.h
1049; VBITS_GE_256-NEXT:    fcvtzs z1.s, p0/m, z1.h
1050; VBITS_GE_256-NEXT:    fcvtzs z0.s, p0/m, z0.h
1051; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x1]
1052; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
1053; VBITS_GE_256-NEXT:    ret
1054;
1055; VBITS_GE_512-LABEL: fcvtzs_v16f16_v16i32:
1056; VBITS_GE_512:       // %bb.0:
1057; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
1058; VBITS_GE_512-NEXT:    ld1h { z0.s }, p0/z, [x0]
1059; VBITS_GE_512-NEXT:    fcvtzs z0.s, p0/m, z0.h
1060; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x1]
1061; VBITS_GE_512-NEXT:    ret
1062  %op1 = load <16 x half>, ptr %a
1063  %res = fptosi <16 x half> %op1 to <16 x i32>
1064  store <16 x i32> %res, ptr %b
1065  ret void
1066}
1067
1068define void @fcvtzs_v32f16_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
1069; CHECK-LABEL: fcvtzs_v32f16_v32i32:
1070; CHECK:       // %bb.0:
1071; CHECK-NEXT:    ptrue p0.s, vl32
1072; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
1073; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.h
1074; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
1075; CHECK-NEXT:    ret
1076  %op1 = load <32 x half>, ptr %a
1077  %res = fptosi <32 x half> %op1 to <32 x i32>
1078  store <32 x i32> %res, ptr %b
1079  ret void
1080}
1081
1082define void @fcvtzs_v64f16_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
1083; CHECK-LABEL: fcvtzs_v64f16_v64i32:
1084; CHECK:       // %bb.0:
1085; CHECK-NEXT:    ptrue p0.s, vl64
1086; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
1087; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.h
1088; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
1089; CHECK-NEXT:    ret
1090  %op1 = load <64 x half>, ptr %a
1091  %res = fptosi <64 x half> %op1 to <64 x i32>
1092  store <64 x i32> %res, ptr %b
1093  ret void
1094}
1095
1096;
1097; FCVTZS H -> D
1098;
1099
1100; Don't use SVE for 64-bit vectors.
1101define <1 x i64> @fcvtzs_v1f16_v1i64(<1 x half> %op1) vscale_range(2,0) #0 {
1102; CHECK-LABEL: fcvtzs_v1f16_v1i64:
1103; CHECK:       // %bb.0:
1104; CHECK-NEXT:    fcvtzs x8, h0
1105; CHECK-NEXT:    fmov d0, x8
1106; CHECK-NEXT:    ret
1107  %res = fptosi <1 x half> %op1 to <1 x i64>
1108  ret <1 x i64> %res
1109}
1110
1111; v2f16 is not legal for NEON, so use SVE
1112define <2 x i64> @fcvtzs_v2f16_v2i64(<2 x half> %op1) vscale_range(2,0) #0 {
1113; CHECK-LABEL: fcvtzs_v2f16_v2i64:
1114; CHECK:       // %bb.0:
1115; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
1116; CHECK-NEXT:    ptrue p0.d, vl4
1117; CHECK-NEXT:    uunpklo z0.s, z0.h
1118; CHECK-NEXT:    uunpklo z0.d, z0.s
1119; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.h
1120; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
1121; CHECK-NEXT:    ret
1122  %res = fptosi <2 x half> %op1 to <2 x i64>
1123  ret <2 x i64> %res
1124}
1125
1126define void @fcvtzs_v4f16_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
1127; CHECK-LABEL: fcvtzs_v4f16_v4i64:
1128; CHECK:       // %bb.0:
1129; CHECK-NEXT:    ldr d0, [x0]
1130; CHECK-NEXT:    ptrue p0.d, vl4
1131; CHECK-NEXT:    uunpklo z0.s, z0.h
1132; CHECK-NEXT:    uunpklo z0.d, z0.s
1133; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.h
1134; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
1135; CHECK-NEXT:    ret
1136  %op1 = load <4 x half>, ptr %a
1137  %res = fptosi <4 x half> %op1 to <4 x i64>
1138  store <4 x i64> %res, ptr %b
1139  ret void
1140}
1141
1142define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) #0 {
1143; VBITS_GE_256-LABEL: fcvtzs_v8f16_v8i64:
1144; VBITS_GE_256:       // %bb.0:
1145; VBITS_GE_256-NEXT:    ldr q0, [x0]
1146; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
1147; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
1148; VBITS_GE_256-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
1149; VBITS_GE_256-NEXT:    uunpklo z0.s, z0.h
1150; VBITS_GE_256-NEXT:    uunpklo z1.s, z1.h
1151; VBITS_GE_256-NEXT:    uunpklo z0.d, z0.s
1152; VBITS_GE_256-NEXT:    uunpklo z1.d, z1.s
1153; VBITS_GE_256-NEXT:    fcvtzs z0.d, p0/m, z0.h
1154; VBITS_GE_256-NEXT:    fcvtzs z1.d, p0/m, z1.h
1155; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x1]
1156; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x1, x8, lsl #3]
1157; VBITS_GE_256-NEXT:    ret
1158;
1159; VBITS_GE_512-LABEL: fcvtzs_v8f16_v8i64:
1160; VBITS_GE_512:       // %bb.0:
1161; VBITS_GE_512-NEXT:    ldr q0, [x0]
1162; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
1163; VBITS_GE_512-NEXT:    uunpklo z0.s, z0.h
1164; VBITS_GE_512-NEXT:    uunpklo z0.d, z0.s
1165; VBITS_GE_512-NEXT:    fcvtzs z0.d, p0/m, z0.h
1166; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x1]
1167; VBITS_GE_512-NEXT:    ret
1168  %op1 = load <8 x half>, ptr %a
1169  %res = fptosi <8 x half> %op1 to <8 x i64>
1170  store <8 x i64> %res, ptr %b
1171  ret void
1172}
1173
1174define void @fcvtzs_v16f16_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
1175; CHECK-LABEL: fcvtzs_v16f16_v16i64:
1176; CHECK:       // %bb.0:
1177; CHECK-NEXT:    ptrue p0.d, vl16
1178; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0]
1179; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.h
1180; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
1181; CHECK-NEXT:    ret
1182  %op1 = load <16 x half>, ptr %a
1183  %res = fptosi <16 x half> %op1 to <16 x i64>
1184  store <16 x i64> %res, ptr %b
1185  ret void
1186}
1187
1188define void @fcvtzs_v32f16_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
1189; CHECK-LABEL: fcvtzs_v32f16_v32i64:
1190; CHECK:       // %bb.0:
1191; CHECK-NEXT:    ptrue p0.d, vl32
1192; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0]
1193; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.h
1194; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
1195; CHECK-NEXT:    ret
1196  %op1 = load <32 x half>, ptr %a
1197  %res = fptosi <32 x half> %op1 to <32 x i64>
1198  store <32 x i64> %res, ptr %b
1199  ret void
1200}
1201
1202;
1203; FCVTZS S -> H
1204;
1205
1206; Don't use SVE for 64-bit vectors.
1207define <2 x i16> @fcvtzs_v2f32_v2i16(<2 x float> %op1) vscale_range(2,0) #0 {
1208; CHECK-LABEL: fcvtzs_v2f32_v2i16:
1209; CHECK:       // %bb.0:
1210; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
1211; CHECK-NEXT:    ret
1212  %res = fptosi <2 x float> %op1 to <2 x i16>
1213  ret <2 x i16> %res
1214}
1215
1216; Don't use SVE for 128-bit vectors.
1217define <4 x i16> @fcvtzs_v4f32_v4i16(<4 x float> %op1) vscale_range(2,0) #0 {
1218; CHECK-LABEL: fcvtzs_v4f32_v4i16:
1219; CHECK:       // %bb.0:
1220; CHECK-NEXT:    fcvtzs v1.4s, v0.4s
1221; CHECK-NEXT:    mov w8, v1.s[1]
1222; CHECK-NEXT:    mov v0.16b, v1.16b
1223; CHECK-NEXT:    mov w9, v1.s[2]
1224; CHECK-NEXT:    mov v0.h[1], w8
1225; CHECK-NEXT:    mov w8, v1.s[3]
1226; CHECK-NEXT:    mov v0.h[2], w9
1227; CHECK-NEXT:    mov v0.h[3], w8
1228; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
1229; CHECK-NEXT:    ret
1230  %res = fptosi <4 x float> %op1 to <4 x i16>
1231  ret <4 x i16> %res
1232}
1233
1234define <8 x i16> @fcvtzs_v8f32_v8i16(ptr %a) vscale_range(2,0) #0 {
1235; CHECK-LABEL: fcvtzs_v8f32_v8i16:
1236; CHECK:       // %bb.0:
1237; CHECK-NEXT:    ptrue p0.s, vl8
1238; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1239; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
1240; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
1241; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
1242; CHECK-NEXT:    ret
1243  %op1 = load <8 x float>, ptr %a
1244  %res = fptosi <8 x float> %op1 to <8 x i16>
1245  ret <8 x i16> %res
1246}
1247
1248define void @fcvtzs_v16f32_v16i16(ptr %a, ptr %b) #0 {
1249; VBITS_GE_256-LABEL: fcvtzs_v16f32_v16i16:
1250; VBITS_GE_256:       // %bb.0:
1251; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
1252; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
1253; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
1254; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
1255; VBITS_GE_256-NEXT:    fcvtzs z0.s, p0/m, z0.s
1256; VBITS_GE_256-NEXT:    fcvtzs z1.s, p0/m, z1.s
1257; VBITS_GE_256-NEXT:    ptrue p0.h, vl8
1258; VBITS_GE_256-NEXT:    uzp1 z0.h, z0.h, z0.h
1259; VBITS_GE_256-NEXT:    uzp1 z1.h, z1.h, z1.h
1260; VBITS_GE_256-NEXT:    splice z1.h, p0, z1.h, z0.h
1261; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
1262; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x1]
1263; VBITS_GE_256-NEXT:    ret
1264;
1265; VBITS_GE_512-LABEL: fcvtzs_v16f32_v16i16:
1266; VBITS_GE_512:       // %bb.0:
1267; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
1268; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
1269; VBITS_GE_512-NEXT:    fcvtzs z0.s, p0/m, z0.s
1270; VBITS_GE_512-NEXT:    st1h { z0.s }, p0, [x1]
1271; VBITS_GE_512-NEXT:    ret
1272  %op1 = load <16 x float>, ptr %a
1273  %res = fptosi <16 x float> %op1 to <16 x i16>
1274  store <16 x i16> %res, ptr %b
1275  ret void
1276}
1277
1278define void @fcvtzs_v32f32_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
1279; CHECK-LABEL: fcvtzs_v32f32_v32i16:
1280; CHECK:       // %bb.0:
1281; CHECK-NEXT:    ptrue p0.s, vl32
1282; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1283; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
1284; CHECK-NEXT:    st1h { z0.s }, p0, [x1]
1285; CHECK-NEXT:    ret
1286  %op1 = load <32 x float>, ptr %a
1287  %res = fptosi <32 x float> %op1 to <32 x i16>
1288  store <32 x i16> %res, ptr %b
1289  ret void
1290}
1291
1292define void @fcvtzs_v64f32_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
1293; CHECK-LABEL: fcvtzs_v64f32_v64i16:
1294; CHECK:       // %bb.0:
1295; CHECK-NEXT:    ptrue p0.s, vl64
1296; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1297; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
1298; CHECK-NEXT:    st1h { z0.s }, p0, [x1]
1299; CHECK-NEXT:    ret
1300  %op1 = load <64 x float>, ptr %a
1301  %res = fptosi <64 x float> %op1 to <64 x i16>
1302  store <64 x i16> %res, ptr %b
1303  ret void
1304}
1305
1306;
1307; FCVTZS S -> S
1308;
1309
1310; Don't use SVE for 64-bit vectors.
1311define <2 x i32> @fcvtzs_v2f32_v2i32(<2 x float> %op1) vscale_range(2,0) #0 {
1312; CHECK-LABEL: fcvtzs_v2f32_v2i32:
1313; CHECK:       // %bb.0:
1314; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
1315; CHECK-NEXT:    ret
1316  %res = fptosi <2 x float> %op1 to <2 x i32>
1317  ret <2 x i32> %res
1318}
1319
1320; Don't use SVE for 128-bit vectors.
1321define <4 x i32> @fcvtzs_v4f32_v4i32(<4 x float> %op1) vscale_range(2,0) #0 {
1322; CHECK-LABEL: fcvtzs_v4f32_v4i32:
1323; CHECK:       // %bb.0:
1324; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
1325; CHECK-NEXT:    ret
1326  %res = fptosi <4 x float> %op1 to <4 x i32>
1327  ret <4 x i32> %res
1328}
1329
1330define void @fcvtzs_v8f32_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
1331; CHECK-LABEL: fcvtzs_v8f32_v8i32:
1332; CHECK:       // %bb.0:
1333; CHECK-NEXT:    ptrue p0.s, vl8
1334; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1335; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
1336; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
1337; CHECK-NEXT:    ret
1338  %op1 = load <8 x float>, ptr %a
1339  %res = fptosi <8 x float> %op1 to <8 x i32>
1340  store <8 x i32> %res, ptr %b
1341  ret void
1342}
1343
1344define void @fcvtzs_v16f32_v16i32(ptr %a, ptr %b) #0 {
1345; VBITS_GE_256-LABEL: fcvtzs_v16f32_v16i32:
1346; VBITS_GE_256:       // %bb.0:
1347; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
1348; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
1349; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
1350; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
1351; VBITS_GE_256-NEXT:    fcvtzs z0.s, p0/m, z0.s
1352; VBITS_GE_256-NEXT:    fcvtzs z1.s, p0/m, z1.s
1353; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
1354; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x1]
1355; VBITS_GE_256-NEXT:    ret
1356;
1357; VBITS_GE_512-LABEL: fcvtzs_v16f32_v16i32:
1358; VBITS_GE_512:       // %bb.0:
1359; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
1360; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
1361; VBITS_GE_512-NEXT:    fcvtzs z0.s, p0/m, z0.s
1362; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x1]
1363; VBITS_GE_512-NEXT:    ret
1364  %op1 = load <16 x float>, ptr %a
1365  %res = fptosi <16 x float> %op1 to <16 x i32>
1366  store <16 x i32> %res, ptr %b
1367  ret void
1368}
1369
1370define void @fcvtzs_v32f32_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
1371; CHECK-LABEL: fcvtzs_v32f32_v32i32:
1372; CHECK:       // %bb.0:
1373; CHECK-NEXT:    ptrue p0.s, vl32
1374; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1375; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
1376; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
1377; CHECK-NEXT:    ret
1378  %op1 = load <32 x float>, ptr %a
1379  %res = fptosi <32 x float> %op1 to <32 x i32>
1380  store <32 x i32> %res, ptr %b
1381  ret void
1382}
1383
1384define void @fcvtzs_v64f32_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
1385; CHECK-LABEL: fcvtzs_v64f32_v64i32:
1386; CHECK:       // %bb.0:
1387; CHECK-NEXT:    ptrue p0.s, vl64
1388; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1389; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
1390; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
1391; CHECK-NEXT:    ret
1392  %op1 = load <64 x float>, ptr %a
1393  %res = fptosi <64 x float> %op1 to <64 x i32>
1394  store <64 x i32> %res, ptr %b
1395  ret void
1396}
1397
1398;
1399; FCVTZS S -> D
1400;
1401
1402; Don't use SVE for 64-bit vectors.
1403define <1 x i64> @fcvtzs_v1f32_v1i64(<1 x float> %op1) vscale_range(2,0) #0 {
1404; CHECK-LABEL: fcvtzs_v1f32_v1i64:
1405; CHECK:       // %bb.0:
1406; CHECK-NEXT:    fcvtl v0.2d, v0.2s
1407; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
1408; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
1409; CHECK-NEXT:    ret
1410  %res = fptosi <1 x float> %op1 to <1 x i64>
1411  ret <1 x i64> %res
1412}
1413
1414; Don't use SVE for 128-bit vectors.
1415define <2 x i64> @fcvtzs_v2f32_v2i64(<2 x float> %op1) vscale_range(2,0) #0 {
1416; CHECK-LABEL: fcvtzs_v2f32_v2i64:
1417; CHECK:       // %bb.0:
1418; CHECK-NEXT:    fcvtl v0.2d, v0.2s
1419; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
1420; CHECK-NEXT:    ret
1421  %res = fptosi <2 x float> %op1 to <2 x i64>
1422  ret <2 x i64> %res
1423}
1424
1425define void @fcvtzs_v4f32_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
1426; CHECK-LABEL: fcvtzs_v4f32_v4i64:
1427; CHECK:       // %bb.0:
1428; CHECK-NEXT:    ldr q0, [x0]
1429; CHECK-NEXT:    ptrue p0.d, vl4
1430; CHECK-NEXT:    uunpklo z0.d, z0.s
1431; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.s
1432; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
1433; CHECK-NEXT:    ret
1434  %op1 = load <4 x float>, ptr %a
1435  %res = fptosi <4 x float> %op1 to <4 x i64>
1436  store <4 x i64> %res, ptr %b
1437  ret void
1438}
1439
1440define void @fcvtzs_v8f32_v8i64(ptr %a, ptr %b) #0 {
1441; VBITS_GE_256-LABEL: fcvtzs_v8f32_v8i64:
1442; VBITS_GE_256:       // %bb.0:
1443; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
1444; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
1445; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0]
1446; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
1447; VBITS_GE_256-NEXT:    uunpklo z1.d, z0.s
1448; VBITS_GE_256-NEXT:    ext z0.b, z0.b, z0.b, #16
1449; VBITS_GE_256-NEXT:    uunpklo z0.d, z0.s
1450; VBITS_GE_256-NEXT:    fcvtzs z1.d, p0/m, z1.s
1451; VBITS_GE_256-NEXT:    fcvtzs z0.d, p0/m, z0.s
1452; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x1]
1453; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
1454; VBITS_GE_256-NEXT:    ret
1455;
1456; VBITS_GE_512-LABEL: fcvtzs_v8f32_v8i64:
1457; VBITS_GE_512:       // %bb.0:
1458; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
1459; VBITS_GE_512-NEXT:    ld1w { z0.d }, p0/z, [x0]
1460; VBITS_GE_512-NEXT:    fcvtzs z0.d, p0/m, z0.s
1461; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x1]
1462; VBITS_GE_512-NEXT:    ret
1463  %op1 = load <8 x float>, ptr %a
1464  %res = fptosi <8 x float> %op1 to <8 x i64>
1465  store <8 x i64> %res, ptr %b
1466  ret void
1467}
1468
1469define void @fcvtzs_v16f32_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
1470; CHECK-LABEL: fcvtzs_v16f32_v16i64:
1471; CHECK:       // %bb.0:
1472; CHECK-NEXT:    ptrue p0.d, vl16
1473; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0]
1474; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.s
1475; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
1476; CHECK-NEXT:    ret
1477  %op1 = load <16 x float>, ptr %a
1478  %res = fptosi <16 x float> %op1 to <16 x i64>
1479  store <16 x i64> %res, ptr %b
1480  ret void
1481}
1482
1483define void @fcvtzs_v32f32_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
1484; CHECK-LABEL: fcvtzs_v32f32_v32i64:
1485; CHECK:       // %bb.0:
1486; CHECK-NEXT:    ptrue p0.d, vl32
1487; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0]
1488; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.s
1489; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
1490; CHECK-NEXT:    ret
1491  %op1 = load <32 x float>, ptr %a
1492  %res = fptosi <32 x float> %op1 to <32 x i64>
1493  store <32 x i64> %res, ptr %b
1494  ret void
1495}
1496
1497
1498;
1499; FCVTZS D -> H
1500;
1501
1502; v1f64 is perfered to be widened to v4f64, so use SVE
1503define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 {
1504; CHECK-LABEL: fcvtzs_v1f64_v1i16:
1505; CHECK:       // %bb.0:
1506; CHECK-NEXT:    ptrue p0.d, vl4
1507; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
1508; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
1509; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
1510; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
1511; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1512; CHECK-NEXT:    ret
1513  %res = fptosi <1 x double> %op1 to <1 x i16>
1514  ret <1 x i16> %res
1515}
1516
1517; Don't use SVE for 128-bit vectors.
1518define <2 x i16> @fcvtzs_v2f64_v2i16(<2 x double> %op1) vscale_range(2,0) #0 {
1519; CHECK-LABEL: fcvtzs_v2f64_v2i16:
1520; CHECK:       // %bb.0:
1521; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
1522; CHECK-NEXT:    xtn v0.2s, v0.2d
1523; CHECK-NEXT:    ret
1524  %res = fptosi <2 x double> %op1 to <2 x i16>
1525  ret <2 x i16> %res
1526}
1527
1528define <4 x i16> @fcvtzs_v4f64_v4i16(ptr %a) vscale_range(2,0) #0 {
1529; CHECK-LABEL: fcvtzs_v4f64_v4i16:
1530; CHECK:       // %bb.0:
1531; CHECK-NEXT:    ptrue p0.d, vl4
1532; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1533; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
1534; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
1535; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
1536; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1537; CHECK-NEXT:    ret
1538  %op1 = load <4 x double>, ptr %a
1539  %res = fptosi <4 x double> %op1 to <4 x i16>
1540  ret <4 x i16> %res
1541}
1542
1543define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) #0 {
1544; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i16:
1545; VBITS_GE_256:       // %bb.0:
1546; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
1547; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
1548; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1549; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
1550; VBITS_GE_256-NEXT:    fcvtzs z0.d, p0/m, z0.d
1551; VBITS_GE_256-NEXT:    fcvtzs z1.d, p0/m, z1.d
1552; VBITS_GE_256-NEXT:    uzp1 z0.s, z0.s, z0.s
1553; VBITS_GE_256-NEXT:    uzp1 z1.s, z1.s, z1.s
1554; VBITS_GE_256-NEXT:    uzp1 z2.h, z0.h, z0.h
1555; VBITS_GE_256-NEXT:    uzp1 z0.h, z1.h, z1.h
1556; VBITS_GE_256-NEXT:    mov v0.d[1], v2.d[0]
1557; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
1558; VBITS_GE_256-NEXT:    ret
1559;
1560; VBITS_GE_512-LABEL: fcvtzs_v8f64_v8i16:
1561; VBITS_GE_512:       // %bb.0:
1562; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
1563; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
1564; VBITS_GE_512-NEXT:    fcvtzs z0.d, p0/m, z0.d
1565; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
1566; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
1567; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
1568; VBITS_GE_512-NEXT:    ret
1569  %op1 = load <8 x double>, ptr %a
1570  %res = fptosi <8 x double> %op1 to <8 x i16>
1571  ret <8 x i16> %res
1572}
1573
1574define void @fcvtzs_v16f64_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
1575; CHECK-LABEL: fcvtzs_v16f64_v16i16:
1576; CHECK:       // %bb.0:
1577; CHECK-NEXT:    ptrue p0.d, vl16
1578; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1579; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
1580; CHECK-NEXT:    st1h { z0.d }, p0, [x1]
1581; CHECK-NEXT:    ret
1582  %op1 = load <16 x double>, ptr %a
1583  %res = fptosi <16 x double> %op1 to <16 x i16>
1584  store <16 x i16> %res, ptr %b
1585  ret void
1586}
1587
1588define void @fcvtzs_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
1589; CHECK-LABEL: fcvtzs_v32f64_v32i16:
1590; CHECK:       // %bb.0:
1591; CHECK-NEXT:    ptrue p0.d, vl32
1592; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1593; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
1594; CHECK-NEXT:    st1h { z0.d }, p0, [x1]
1595; CHECK-NEXT:    ret
1596  %op1 = load <32 x double>, ptr %a
1597  %res = fptosi <32 x double> %op1 to <32 x i16>
1598  store <32 x i16> %res, ptr %b
1599  ret void
1600}
1601
1602;
1603; FCVTZS D -> S
1604;
1605
1606; Don't use SVE for 64-bit vectors.
1607define <1 x i32> @fcvtzs_v1f64_v1i32(<1 x double> %op1) vscale_range(2,0) #0 {
1608; CHECK-LABEL: fcvtzs_v1f64_v1i32:
1609; CHECK:       // %bb.0:
1610; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1611; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
1612; CHECK-NEXT:    xtn v0.2s, v0.2d
1613; CHECK-NEXT:    ret
1614  %res = fptosi <1 x double> %op1 to <1 x i32>
1615  ret <1 x i32> %res
1616}
1617
1618; Don't use SVE for 128-bit vectors.
1619define <2 x i32> @fcvtzs_v2f64_v2i32(<2 x double> %op1) vscale_range(2,0) #0 {
1620; CHECK-LABEL: fcvtzs_v2f64_v2i32:
1621; CHECK:       // %bb.0:
1622; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
1623; CHECK-NEXT:    xtn v0.2s, v0.2d
1624; CHECK-NEXT:    ret
1625  %res = fptosi <2 x double> %op1 to <2 x i32>
1626  ret <2 x i32> %res
1627}
1628
1629define <4 x i32> @fcvtzs_v4f64_v4i32(ptr %a) vscale_range(2,0) #0 {
1630; CHECK-LABEL: fcvtzs_v4f64_v4i32:
1631; CHECK:       // %bb.0:
1632; CHECK-NEXT:    ptrue p0.d, vl4
1633; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1634; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
1635; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
1636; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
1637; CHECK-NEXT:    ret
1638  %op1 = load <4 x double>, ptr %a
1639  %res = fptosi <4 x double> %op1 to <4 x i32>
1640  ret <4 x i32> %res
1641}
1642
1643define void @fcvtzs_v8f64_v8i32(ptr %a, ptr %b) #0 {
1644; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i32:
1645; VBITS_GE_256:       // %bb.0:
1646; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
1647; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
1648; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1649; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
1650; VBITS_GE_256-NEXT:    fcvtzs z0.d, p0/m, z0.d
1651; VBITS_GE_256-NEXT:    fcvtzs z1.d, p0/m, z1.d
1652; VBITS_GE_256-NEXT:    ptrue p0.s, vl4
1653; VBITS_GE_256-NEXT:    uzp1 z0.s, z0.s, z0.s
1654; VBITS_GE_256-NEXT:    uzp1 z1.s, z1.s, z1.s
1655; VBITS_GE_256-NEXT:    splice z1.s, p0, z1.s, z0.s
1656; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
1657; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x1]
1658; VBITS_GE_256-NEXT:    ret
1659;
1660; VBITS_GE_512-LABEL: fcvtzs_v8f64_v8i32:
1661; VBITS_GE_512:       // %bb.0:
1662; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
1663; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
1664; VBITS_GE_512-NEXT:    fcvtzs z0.d, p0/m, z0.d
1665; VBITS_GE_512-NEXT:    st1w { z0.d }, p0, [x1]
1666; VBITS_GE_512-NEXT:    ret
1667  %op1 = load <8 x double>, ptr %a
1668  %res = fptosi <8 x double> %op1 to <8 x i32>
1669  store <8 x i32> %res, ptr %b
1670  ret void
1671}
1672
1673define void @fcvtzs_v16f64_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
1674; CHECK-LABEL: fcvtzs_v16f64_v16i32:
1675; CHECK:       // %bb.0:
1676; CHECK-NEXT:    ptrue p0.d, vl16
1677; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1678; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
1679; CHECK-NEXT:    st1w { z0.d }, p0, [x1]
1680; CHECK-NEXT:    ret
1681  %op1 = load <16 x double>, ptr %a
1682  %res = fptosi <16 x double> %op1 to <16 x i32>
1683  store <16 x i32> %res, ptr %b
1684  ret void
1685}
1686
1687define void @fcvtzs_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
1688; CHECK-LABEL: fcvtzs_v32f64_v32i32:
1689; CHECK:       // %bb.0:
1690; CHECK-NEXT:    ptrue p0.d, vl32
1691; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1692; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
1693; CHECK-NEXT:    st1w { z0.d }, p0, [x1]
1694; CHECK-NEXT:    ret
1695  %op1 = load <32 x double>, ptr %a
1696  %res = fptosi <32 x double> %op1 to <32 x i32>
1697  store <32 x i32> %res, ptr %b
1698  ret void
1699}
1700
1701;
1702; FCVTZS D -> D
1703;
1704
1705; Don't use SVE for 64-bit vectors.
1706define <1 x i64> @fcvtzs_v1f64_v1i64(<1 x double> %op1) vscale_range(2,0) #0 {
1707; CHECK-LABEL: fcvtzs_v1f64_v1i64:
1708; CHECK:       // %bb.0:
1709; CHECK-NEXT:    fcvtzs x8, d0
1710; CHECK-NEXT:    fmov d0, x8
1711; CHECK-NEXT:    ret
1712  %res = fptosi <1 x double> %op1 to <1 x i64>
1713  ret <1 x i64> %res
1714}
1715
1716; Don't use SVE for 128-bit vectors.
1717define <2 x i64> @fcvtzs_v2f64_v2i64(<2 x double> %op1) vscale_range(2,0) #0 {
1718; CHECK-LABEL: fcvtzs_v2f64_v2i64:
1719; CHECK:       // %bb.0:
1720; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
1721; CHECK-NEXT:    ret
1722  %res = fptosi <2 x double> %op1 to <2 x i64>
1723  ret <2 x i64> %res
1724}
1725
1726define void @fcvtzs_v4f64_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
1727; CHECK-LABEL: fcvtzs_v4f64_v4i64:
1728; CHECK:       // %bb.0:
1729; CHECK-NEXT:    ptrue p0.d, vl4
1730; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1731; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
1732; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
1733; CHECK-NEXT:    ret
1734  %op1 = load <4 x double>, ptr %a
1735  %res = fptosi <4 x double> %op1 to <4 x i64>
1736  store <4 x i64> %res, ptr %b
1737  ret void
1738}
1739
1740define void @fcvtzs_v8f64_v8i64(ptr %a, ptr %b) #0 {
1741; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i64:
1742; VBITS_GE_256:       // %bb.0:
1743; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
1744; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
1745; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1746; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
1747; VBITS_GE_256-NEXT:    fcvtzs z0.d, p0/m, z0.d
1748; VBITS_GE_256-NEXT:    fcvtzs z1.d, p0/m, z1.d
1749; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
1750; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x1]
1751; VBITS_GE_256-NEXT:    ret
1752;
1753; VBITS_GE_512-LABEL: fcvtzs_v8f64_v8i64:
1754; VBITS_GE_512:       // %bb.0:
1755; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
1756; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
1757; VBITS_GE_512-NEXT:    fcvtzs z0.d, p0/m, z0.d
1758; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x1]
1759; VBITS_GE_512-NEXT:    ret
1760  %op1 = load <8 x double>, ptr %a
1761  %res = fptosi <8 x double> %op1 to <8 x i64>
1762  store <8 x i64> %res, ptr %b
1763  ret void
1764}
1765
1766define void @fcvtzs_v16f64_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
1767; CHECK-LABEL: fcvtzs_v16f64_v16i64:
1768; CHECK:       // %bb.0:
1769; CHECK-NEXT:    ptrue p0.d, vl16
1770; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1771; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
1772; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
1773; CHECK-NEXT:    ret
1774  %op1 = load <16 x double>, ptr %a
1775  %res = fptosi <16 x double> %op1 to <16 x i64>
1776  store <16 x i64> %res, ptr %b
1777  ret void
1778}
1779
1780define void @fcvtzs_v32f64_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
1781; CHECK-LABEL: fcvtzs_v32f64_v32i64:
1782; CHECK:       // %bb.0:
1783; CHECK-NEXT:    ptrue p0.d, vl32
1784; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1785; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
1786; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
1787; CHECK-NEXT:    ret
1788  %op1 = load <32 x double>, ptr %a
1789  %res = fptosi <32 x double> %op1 to <32 x i64>
1790  store <32 x i64> %res, ptr %b
1791  ret void
1792}
1793
1794attributes #0 = { "target-features"="+sve" }
1795