xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5
6target triple = "aarch64-unknown-linux-gnu"
7
8;
9; UCVTF H -> H
10;
11
12; Don't use SVE for 64-bit vectors.
13define <4 x half> @ucvtf_v4i16_v4f16(<4 x i16> %op1) vscale_range(2,0) #0 {
14; CHECK-LABEL: ucvtf_v4i16_v4f16:
15; CHECK:       // %bb.0:
16; CHECK-NEXT:    ucvtf v0.4h, v0.4h
17; CHECK-NEXT:    ret
18  %res = uitofp <4 x i16> %op1 to <4 x half>
19  ret <4 x half> %res
20}
21
22; Don't use SVE for 128-bit vectors.
23define void @ucvtf_v8i16_v8f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
24; CHECK-LABEL: ucvtf_v8i16_v8f16:
25; CHECK:       // %bb.0:
26; CHECK-NEXT:    ldr q0, [x0]
27; CHECK-NEXT:    ucvtf v0.8h, v0.8h
28; CHECK-NEXT:    str q0, [x1]
29; CHECK-NEXT:    ret
30  %op1 = load <8 x i16>, ptr %a
31  %res = uitofp <8 x i16> %op1 to <8 x half>
32  store <8 x half> %res, ptr %b
33  ret void
34}
35
36define void @ucvtf_v16i16_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
37; CHECK-LABEL: ucvtf_v16i16_v16f16:
38; CHECK:       // %bb.0:
39; CHECK-NEXT:    ptrue p0.h, vl16
40; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
41; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.h
42; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
43; CHECK-NEXT:    ret
44  %op1 = load <16 x i16>, ptr %a
45  %res = uitofp <16 x i16> %op1 to <16 x half>
46  store <16 x half> %res, ptr %b
47  ret void
48}
49
50define void @ucvtf_v32i16_v32f16(ptr %a, ptr %b) #0 {
51; VBITS_GE_256-LABEL: ucvtf_v32i16_v32f16:
52; VBITS_GE_256:       // %bb.0:
53; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
54; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
55; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
56; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
57; VBITS_GE_256-NEXT:    ucvtf z0.h, p0/m, z0.h
58; VBITS_GE_256-NEXT:    ucvtf z1.h, p0/m, z1.h
59; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x1, x8, lsl #1]
60; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x1]
61; VBITS_GE_256-NEXT:    ret
62;
63; VBITS_GE_512-LABEL: ucvtf_v32i16_v32f16:
64; VBITS_GE_512:       // %bb.0:
65; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
66; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
67; VBITS_GE_512-NEXT:    ucvtf z0.h, p0/m, z0.h
68; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x1]
69; VBITS_GE_512-NEXT:    ret
70  %op1 = load <32 x i16>, ptr %a
71  %res = uitofp <32 x i16> %op1 to <32 x half>
72  store <32 x half> %res, ptr %b
73  ret void
74}
75
76define void @ucvtf_v64i16_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
77; CHECK-LABEL: ucvtf_v64i16_v64f16:
78; CHECK:       // %bb.0:
79; CHECK-NEXT:    ptrue p0.h, vl64
80; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
81; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.h
82; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
83; CHECK-NEXT:    ret
84  %op1 = load <64 x i16>, ptr %a
85  %res = uitofp <64 x i16> %op1 to <64 x half>
86  store <64 x half> %res, ptr %b
87  ret void
88}
89
90define void @ucvtf_v128i16_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
91; CHECK-LABEL: ucvtf_v128i16_v128f16:
92; CHECK:       // %bb.0:
93; CHECK-NEXT:    ptrue p0.h, vl128
94; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
95; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.h
96; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
97; CHECK-NEXT:    ret
98  %op1 = load <128 x i16>, ptr %a
99  %res = uitofp <128 x i16> %op1 to <128 x half>
100  store <128 x half> %res, ptr %b
101  ret void
102}
103
104;
105; UCVTF H -> S
106;
107
108; Don't use SVE for 64-bit vectors.
109define <2 x float> @ucvtf_v2i16_v2f32(<2 x i16> %op1) vscale_range(2,0) #0 {
110; CHECK-LABEL: ucvtf_v2i16_v2f32:
111; CHECK:       // %bb.0:
112; CHECK-NEXT:    movi d1, #0x00ffff0000ffff
113; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
114; CHECK-NEXT:    ucvtf v0.2s, v0.2s
115; CHECK-NEXT:    ret
116  %res = uitofp <2 x i16> %op1 to <2 x float>
117  ret <2 x float> %res
118}
119
120; Don't use SVE for 128-bit vectors.
121define <4 x float> @ucvtf_v4i16_v4f32(<4 x i16> %op1) vscale_range(2,0) #0 {
122; CHECK-LABEL: ucvtf_v4i16_v4f32:
123; CHECK:       // %bb.0:
124; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
125; CHECK-NEXT:    ucvtf v0.4s, v0.4s
126; CHECK-NEXT:    ret
127  %res = uitofp <4 x i16> %op1 to <4 x float>
128  ret <4 x float> %res
129}
130
131define void @ucvtf_v8i16_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
132; CHECK-LABEL: ucvtf_v8i16_v8f32:
133; CHECK:       // %bb.0:
134; CHECK-NEXT:    ldr q0, [x0]
135; CHECK-NEXT:    ptrue p0.s, vl8
136; CHECK-NEXT:    uunpklo z0.s, z0.h
137; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
138; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
139; CHECK-NEXT:    ret
140  %op1 = load <8 x i16>, ptr %a
141  %res = uitofp <8 x i16> %op1 to <8 x float>
142  store <8 x float> %res, ptr %b
143  ret void
144}
145
146define void @ucvtf_v16i16_v16f32(ptr %a, ptr %b) #0 {
147; VBITS_GE_256-LABEL: ucvtf_v16i16_v16f32:
148; VBITS_GE_256:       // %bb.0:
149; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
150; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
151; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0]
152; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
153; VBITS_GE_256-NEXT:    uunpklo z1.s, z0.h
154; VBITS_GE_256-NEXT:    ext z0.b, z0.b, z0.b, #16
155; VBITS_GE_256-NEXT:    uunpklo z0.s, z0.h
156; VBITS_GE_256-NEXT:    ucvtf z1.s, p0/m, z1.s
157; VBITS_GE_256-NEXT:    ucvtf z0.s, p0/m, z0.s
158; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x1]
159; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
160; VBITS_GE_256-NEXT:    ret
161;
162; VBITS_GE_512-LABEL: ucvtf_v16i16_v16f32:
163; VBITS_GE_512:       // %bb.0:
164; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
165; VBITS_GE_512-NEXT:    ld1h { z0.s }, p0/z, [x0]
166; VBITS_GE_512-NEXT:    ucvtf z0.s, p0/m, z0.s
167; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x1]
168; VBITS_GE_512-NEXT:    ret
169  %op1 = load <16 x i16>, ptr %a
170  %res = uitofp <16 x i16> %op1 to <16 x float>
171  store <16 x float> %res, ptr %b
172  ret void
173}
174
175define void @ucvtf_v32i16_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
176; CHECK-LABEL: ucvtf_v32i16_v32f32:
177; CHECK:       // %bb.0:
178; CHECK-NEXT:    ptrue p0.s, vl32
179; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
180; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
181; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
182; CHECK-NEXT:    ret
183  %op1 = load <32 x i16>, ptr %a
184  %res = uitofp <32 x i16> %op1 to <32 x float>
185  store <32 x float> %res, ptr %b
186  ret void
187}
188
189define void @ucvtf_v64i16_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
190; CHECK-LABEL: ucvtf_v64i16_v64f32:
191; CHECK:       // %bb.0:
192; CHECK-NEXT:    ptrue p0.s, vl64
193; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
194; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
195; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
196; CHECK-NEXT:    ret
197  %op1 = load <64 x i16>, ptr %a
198  %res = uitofp <64 x i16> %op1 to <64 x float>
199  store <64 x float> %res, ptr %b
200  ret void
201}
202
203;
204; UCVTF H -> D
205;
206
207; v1i16 is perfered to be widened to v4i16, which pushes the output into SVE types, so use SVE
208define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) vscale_range(2,0) #0 {
209; CHECK-LABEL: ucvtf_v1i16_v1f64:
210; CHECK:       // %bb.0:
211; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
212; CHECK-NEXT:    ptrue p0.d, vl4
213; CHECK-NEXT:    uunpklo z0.s, z0.h
214; CHECK-NEXT:    uunpklo z0.d, z0.s
215; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
216; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
217; CHECK-NEXT:    ret
218  %res = uitofp <1 x i16> %op1 to <1 x double>
219  ret <1 x double> %res
220}
221
222; Don't use SVE for 128-bit vectors.
223define <2 x double> @ucvtf_v2i16_v2f64(<2 x i16> %op1) vscale_range(2,0) #0 {
224; CHECK-LABEL: ucvtf_v2i16_v2f64:
225; CHECK:       // %bb.0:
226; CHECK-NEXT:    movi d1, #0x00ffff0000ffff
227; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
228; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
229; CHECK-NEXT:    ucvtf v0.2d, v0.2d
230; CHECK-NEXT:    ret
231  %res = uitofp <2 x i16> %op1 to <2 x double>
232  ret <2 x double> %res
233}
234
235define void @ucvtf_v4i16_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
236; CHECK-LABEL: ucvtf_v4i16_v4f64:
237; CHECK:       // %bb.0:
238; CHECK-NEXT:    ldr d0, [x0]
239; CHECK-NEXT:    ptrue p0.d, vl4
240; CHECK-NEXT:    uunpklo z0.s, z0.h
241; CHECK-NEXT:    uunpklo z0.d, z0.s
242; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
243; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
244; CHECK-NEXT:    ret
245  %op1 = load <4 x i16>, ptr %a
246  %res = uitofp <4 x i16> %op1 to <4 x double>
247  store <4 x double> %res, ptr %b
248  ret void
249}
250
251define void @ucvtf_v8i16_v8f64(ptr %a, ptr %b) #0 {
252; VBITS_GE_256-LABEL: ucvtf_v8i16_v8f64:
253; VBITS_GE_256:       // %bb.0:
254; VBITS_GE_256-NEXT:    ldr q0, [x0]
255; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
256; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
257; VBITS_GE_256-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
258; VBITS_GE_256-NEXT:    uunpklo z0.s, z0.h
259; VBITS_GE_256-NEXT:    uunpklo z1.s, z1.h
260; VBITS_GE_256-NEXT:    uunpklo z0.d, z0.s
261; VBITS_GE_256-NEXT:    uunpklo z1.d, z1.s
262; VBITS_GE_256-NEXT:    ucvtf z0.d, p0/m, z0.d
263; VBITS_GE_256-NEXT:    ucvtf z1.d, p0/m, z1.d
264; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x1]
265; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x1, x8, lsl #3]
266; VBITS_GE_256-NEXT:    ret
267;
268; VBITS_GE_512-LABEL: ucvtf_v8i16_v8f64:
269; VBITS_GE_512:       // %bb.0:
270; VBITS_GE_512-NEXT:    ldr q0, [x0]
271; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
272; VBITS_GE_512-NEXT:    uunpklo z0.s, z0.h
273; VBITS_GE_512-NEXT:    uunpklo z0.d, z0.s
274; VBITS_GE_512-NEXT:    ucvtf z0.d, p0/m, z0.d
275; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x1]
276; VBITS_GE_512-NEXT:    ret
277  %op1 = load <8 x i16>, ptr %a
278  %res = uitofp <8 x i16> %op1 to <8 x double>
279  store <8 x double> %res, ptr %b
280  ret void
281}
282
283define void @ucvtf_v16i16_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
284; CHECK-LABEL: ucvtf_v16i16_v16f64:
285; CHECK:       // %bb.0:
286; CHECK-NEXT:    ptrue p0.d, vl16
287; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0]
288; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
289; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
290; CHECK-NEXT:    ret
291  %op1 = load <16 x i16>, ptr %a
292  %res = uitofp <16 x i16> %op1 to <16 x double>
293  store <16 x double> %res, ptr %b
294  ret void
295}
296
297define void @ucvtf_v32i16_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
298; CHECK-LABEL: ucvtf_v32i16_v32f64:
299; CHECK:       // %bb.0:
300; CHECK-NEXT:    ptrue p0.d, vl32
301; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0]
302; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
303; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
304; CHECK-NEXT:    ret
305  %op1 = load <32 x i16>, ptr %a
306  %res = uitofp <32 x i16> %op1 to <32 x double>
307  store <32 x double> %res, ptr %b
308  ret void
309}
310
311;
312; UCVTF S -> H
313;
314
315; Don't use SVE for 64-bit vectors.
316define <2 x half> @ucvtf_v2i32_v2f16(<2 x i32> %op1) vscale_range(2,0) #0 {
317; CHECK-LABEL: ucvtf_v2i32_v2f16:
318; CHECK:       // %bb.0:
319; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
320; CHECK-NEXT:    ucvtf v0.4s, v0.4s
321; CHECK-NEXT:    fcvtn v0.4h, v0.4s
322; CHECK-NEXT:    ret
323  %res = uitofp <2 x i32> %op1 to <2 x half>
324  ret <2 x half> %res
325}
326
327; Don't use SVE for 128-bit vectors.
328define <4 x half> @ucvtf_v4i32_v4f16(<4 x i32> %op1) vscale_range(2,0) #0 {
329; CHECK-LABEL: ucvtf_v4i32_v4f16:
330; CHECK:       // %bb.0:
331; CHECK-NEXT:    ucvtf v0.4s, v0.4s
332; CHECK-NEXT:    fcvtn v0.4h, v0.4s
333; CHECK-NEXT:    ret
334  %res = uitofp <4 x i32> %op1 to <4 x half>
335  ret <4 x half> %res
336}
337
338define <8 x half> @ucvtf_v8i32_v8f16(ptr %a) vscale_range(2,0) #0 {
339; CHECK-LABEL: ucvtf_v8i32_v8f16:
340; CHECK:       // %bb.0:
341; CHECK-NEXT:    ptrue p0.s, vl8
342; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
343; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.s
344; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
345; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
346; CHECK-NEXT:    ret
347  %op1 = load <8 x i32>, ptr %a
348  %res = uitofp <8 x i32> %op1 to <8 x half>
349  ret <8 x half> %res
350}
351
352define void @ucvtf_v16i32_v16f16(ptr %a, ptr %b) #0 {
353; VBITS_GE_256-LABEL: ucvtf_v16i32_v16f16:
354; VBITS_GE_256:       // %bb.0:
355; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
356; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
357; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
358; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
359; VBITS_GE_256-NEXT:    ucvtf z0.h, p0/m, z0.s
360; VBITS_GE_256-NEXT:    ucvtf z1.h, p0/m, z1.s
361; VBITS_GE_256-NEXT:    ptrue p0.h, vl8
362; VBITS_GE_256-NEXT:    uzp1 z0.h, z0.h, z0.h
363; VBITS_GE_256-NEXT:    uzp1 z1.h, z1.h, z1.h
364; VBITS_GE_256-NEXT:    splice z1.h, p0, z1.h, z0.h
365; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
366; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x1]
367; VBITS_GE_256-NEXT:    ret
368;
369; VBITS_GE_512-LABEL: ucvtf_v16i32_v16f16:
370; VBITS_GE_512:       // %bb.0:
371; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
372; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
373; VBITS_GE_512-NEXT:    ucvtf z0.h, p0/m, z0.s
374; VBITS_GE_512-NEXT:    ptrue p0.h, vl16
375; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
376; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x1]
377; VBITS_GE_512-NEXT:    ret
378  %op1 = load <16 x i32>, ptr %a
379  %res = uitofp <16 x i32> %op1 to <16 x half>
380  store <16 x half> %res, ptr %b
381  ret void
382}
383
384define void @ucvtf_v32i32_v32f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
385; CHECK-LABEL: ucvtf_v32i32_v32f16:
386; CHECK:       // %bb.0:
387; CHECK-NEXT:    ptrue p0.s, vl32
388; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
389; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.s
390; CHECK-NEXT:    ptrue p0.h, vl32
391; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
392; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
393; CHECK-NEXT:    ret
394  %op1 = load <32 x i32>, ptr %a
395  %res = uitofp <32 x i32> %op1 to <32 x half>
396  store <32 x half> %res, ptr %b
397  ret void
398}
399
400define void @ucvtf_v64i32_v64f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
401; CHECK-LABEL: ucvtf_v64i32_v64f16:
402; CHECK:       // %bb.0:
403; CHECK-NEXT:    ptrue p0.s, vl64
404; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
405; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.s
406; CHECK-NEXT:    ptrue p0.h, vl64
407; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
408; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
409; CHECK-NEXT:    ret
410  %op1 = load <64 x i32>, ptr %a
411  %res = uitofp <64 x i32> %op1 to <64 x half>
412  store <64 x half> %res, ptr %b
413  ret void
414}
415
416;
417; UCVTF S -> S
418;
419
420; Don't use SVE for 64-bit vectors.
421define <2 x float> @ucvtf_v2i32_v2f32(<2 x i32> %op1) vscale_range(2,0) #0 {
422; CHECK-LABEL: ucvtf_v2i32_v2f32:
423; CHECK:       // %bb.0:
424; CHECK-NEXT:    ucvtf v0.2s, v0.2s
425; CHECK-NEXT:    ret
426  %res = uitofp <2 x i32> %op1 to <2 x float>
427  ret <2 x float> %res
428}
429
430; Don't use SVE for 128-bit vectors.
431define <4 x float> @ucvtf_v4i32_v4f32(<4 x i32> %op1) vscale_range(2,0) #0 {
432; CHECK-LABEL: ucvtf_v4i32_v4f32:
433; CHECK:       // %bb.0:
434; CHECK-NEXT:    ucvtf v0.4s, v0.4s
435; CHECK-NEXT:    ret
436  %res = uitofp <4 x i32> %op1 to <4 x float>
437  ret <4 x float> %res
438}
439
440define void @ucvtf_v8i32_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
441; CHECK-LABEL: ucvtf_v8i32_v8f32:
442; CHECK:       // %bb.0:
443; CHECK-NEXT:    ptrue p0.s, vl8
444; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
445; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
446; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
447; CHECK-NEXT:    ret
448  %op1 = load <8 x i32>, ptr %a
449  %res = uitofp <8 x i32> %op1 to <8 x float>
450  store <8 x float> %res, ptr %b
451  ret void
452}
453
454define void @ucvtf_v16i32_v16f32(ptr %a, ptr %b) #0 {
455; VBITS_GE_256-LABEL: ucvtf_v16i32_v16f32:
456; VBITS_GE_256:       // %bb.0:
457; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
458; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
459; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
460; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
461; VBITS_GE_256-NEXT:    ucvtf z0.s, p0/m, z0.s
462; VBITS_GE_256-NEXT:    ucvtf z1.s, p0/m, z1.s
463; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
464; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x1]
465; VBITS_GE_256-NEXT:    ret
466;
467; VBITS_GE_512-LABEL: ucvtf_v16i32_v16f32:
468; VBITS_GE_512:       // %bb.0:
469; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
470; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
471; VBITS_GE_512-NEXT:    ucvtf z0.s, p0/m, z0.s
472; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x1]
473; VBITS_GE_512-NEXT:    ret
474  %op1 = load <16 x i32>, ptr %a
475  %res = uitofp <16 x i32> %op1 to <16 x float>
476  store <16 x float> %res, ptr %b
477  ret void
478}
479
480define void @ucvtf_v32i32_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
481; CHECK-LABEL: ucvtf_v32i32_v32f32:
482; CHECK:       // %bb.0:
483; CHECK-NEXT:    ptrue p0.s, vl32
484; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
485; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
486; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
487; CHECK-NEXT:    ret
488  %op1 = load <32 x i32>, ptr %a
489  %res = uitofp <32 x i32> %op1 to <32 x float>
490  store <32 x float> %res, ptr %b
491  ret void
492}
493
494define void @ucvtf_v64i32_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
495; CHECK-LABEL: ucvtf_v64i32_v64f32:
496; CHECK:       // %bb.0:
497; CHECK-NEXT:    ptrue p0.s, vl64
498; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
499; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
500; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
501; CHECK-NEXT:    ret
502  %op1 = load <64 x i32>, ptr %a
503  %res = uitofp <64 x i32> %op1 to <64 x float>
504  store <64 x float> %res, ptr %b
505  ret void
506}
507
508;
509; UCVTF S -> D
510;
511
512; Don't use SVE for 64-bit vectors.
513define <1 x double> @ucvtf_v1i32_v1f64(<1 x i32> %op1) vscale_range(2,0) #0 {
514; CHECK-LABEL: ucvtf_v1i32_v1f64:
515; CHECK:       // %bb.0:
516; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
517; CHECK-NEXT:    ucvtf v0.2d, v0.2d
518; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
519; CHECK-NEXT:    ret
520  %res = uitofp <1 x i32> %op1 to <1 x double>
521  ret <1 x double> %res
522}
523
524; Don't use SVE for 128-bit vectors.
525define <2 x double> @ucvtf_v2i32_v2f64(<2 x i32> %op1) vscale_range(2,0) #0 {
526; CHECK-LABEL: ucvtf_v2i32_v2f64:
527; CHECK:       // %bb.0:
528; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
529; CHECK-NEXT:    ucvtf v0.2d, v0.2d
530; CHECK-NEXT:    ret
531  %res = uitofp <2 x i32> %op1 to <2 x double>
532  ret <2 x double> %res
533}
534
535define void @ucvtf_v4i32_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
536; CHECK-LABEL: ucvtf_v4i32_v4f64:
537; CHECK:       // %bb.0:
538; CHECK-NEXT:    ldr q0, [x0]
539; CHECK-NEXT:    ptrue p0.d, vl4
540; CHECK-NEXT:    uunpklo z0.d, z0.s
541; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
542; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
543; CHECK-NEXT:    ret
544  %op1 = load <4 x i32>, ptr %a
545  %res = uitofp <4 x i32> %op1 to <4 x double>
546  store <4 x double> %res, ptr %b
547  ret void
548}
549
550define void @ucvtf_v8i32_v8f64(ptr %a, ptr %b) #0 {
551; VBITS_GE_256-LABEL: ucvtf_v8i32_v8f64:
552; VBITS_GE_256:       // %bb.0:
553; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
554; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
555; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0]
556; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
557; VBITS_GE_256-NEXT:    uunpklo z1.d, z0.s
558; VBITS_GE_256-NEXT:    ext z0.b, z0.b, z0.b, #16
559; VBITS_GE_256-NEXT:    uunpklo z0.d, z0.s
560; VBITS_GE_256-NEXT:    ucvtf z1.d, p0/m, z1.d
561; VBITS_GE_256-NEXT:    ucvtf z0.d, p0/m, z0.d
562; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x1]
563; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
564; VBITS_GE_256-NEXT:    ret
565;
566; VBITS_GE_512-LABEL: ucvtf_v8i32_v8f64:
567; VBITS_GE_512:       // %bb.0:
568; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
569; VBITS_GE_512-NEXT:    ld1w { z0.d }, p0/z, [x0]
570; VBITS_GE_512-NEXT:    ucvtf z0.d, p0/m, z0.d
571; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x1]
572; VBITS_GE_512-NEXT:    ret
573  %op1 = load <8 x i32>, ptr %a
574  %res = uitofp <8 x i32> %op1 to <8 x double>
575  store <8 x double> %res, ptr %b
576  ret void
577}
578
579define void @ucvtf_v16i32_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
580; CHECK-LABEL: ucvtf_v16i32_v16f64:
581; CHECK:       // %bb.0:
582; CHECK-NEXT:    ptrue p0.d, vl16
583; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0]
584; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
585; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
586; CHECK-NEXT:    ret
587  %op1 = load <16 x i32>, ptr %a
588  %res = uitofp <16 x i32> %op1 to <16 x double>
589  store <16 x double> %res, ptr %b
590  ret void
591}
592
593define void @ucvtf_v32i32_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
594; CHECK-LABEL: ucvtf_v32i32_v32f64:
595; CHECK:       // %bb.0:
596; CHECK-NEXT:    ptrue p0.d, vl32
597; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0]
598; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
599; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
600; CHECK-NEXT:    ret
601  %op1 = load <32 x i32>, ptr %a
602  %res = uitofp <32 x i32> %op1 to <32 x double>
603  store <32 x double> %res, ptr %b
604  ret void
605}
606
607;
608; UCVTF D -> H
609;
610
611; Don't use SVE for 64-bit vectors.
612define <1 x half> @ucvtf_v1i64_v1f16(<1 x i64> %op1) vscale_range(2,0) #0 {
613; CHECK-LABEL: ucvtf_v1i64_v1f16:
614; CHECK:       // %bb.0:
615; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
616; CHECK-NEXT:    fmov x8, d0
617; CHECK-NEXT:    ucvtf h0, x8
618; CHECK-NEXT:    ret
619  %res = uitofp <1 x i64> %op1 to <1 x half>
620  ret <1 x half> %res
621}
622
623; v2f16 is not legal for NEON, so use SVE
624define <2 x half> @ucvtf_v2i64_v2f16(<2 x i64> %op1) vscale_range(2,0) #0 {
625; CHECK-LABEL: ucvtf_v2i64_v2f16:
626; CHECK:       // %bb.0:
627; CHECK-NEXT:    ptrue p0.d, vl4
628; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
629; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.d
630; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
631; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
632; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
633; CHECK-NEXT:    ret
634  %res = uitofp <2 x i64> %op1 to <2 x half>
635  ret <2 x half> %res
636}
637
638define <4 x half> @ucvtf_v4i64_v4f16(ptr %a) vscale_range(2,0) #0 {
639; CHECK-LABEL: ucvtf_v4i64_v4f16:
640; CHECK:       // %bb.0:
641; CHECK-NEXT:    ptrue p0.d, vl4
642; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
643; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.d
644; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
645; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
646; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
647; CHECK-NEXT:    ret
648  %op1 = load <4 x i64>, ptr %a
649  %res = uitofp <4 x i64> %op1 to <4 x half>
650  ret <4 x half> %res
651}
652
653define <8 x half> @ucvtf_v8i64_v8f16(ptr %a) #0 {
654; VBITS_GE_256-LABEL: ucvtf_v8i64_v8f16:
655; VBITS_GE_256:       // %bb.0:
656; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
657; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
658; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
659; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
660; VBITS_GE_256-NEXT:    ucvtf z0.h, p0/m, z0.d
661; VBITS_GE_256-NEXT:    ucvtf z1.h, p0/m, z1.d
662; VBITS_GE_256-NEXT:    uzp1 z0.s, z0.s, z0.s
663; VBITS_GE_256-NEXT:    uzp1 z1.s, z1.s, z1.s
664; VBITS_GE_256-NEXT:    uzp1 z2.h, z0.h, z0.h
665; VBITS_GE_256-NEXT:    uzp1 z0.h, z1.h, z1.h
666; VBITS_GE_256-NEXT:    mov v0.d[1], v2.d[0]
667; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
668; VBITS_GE_256-NEXT:    ret
669;
670; VBITS_GE_512-LABEL: ucvtf_v8i64_v8f16:
671; VBITS_GE_512:       // %bb.0:
672; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
673; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
674; VBITS_GE_512-NEXT:    ucvtf z0.h, p0/m, z0.d
675; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
676; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
677; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
678; VBITS_GE_512-NEXT:    ret
679  %op1 = load <8 x i64>, ptr %a
680  %res = uitofp <8 x i64> %op1 to <8 x half>
681  ret <8 x half> %res
682}
683
684define void @ucvtf_v16i64_v16f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
685; CHECK-LABEL: ucvtf_v16i64_v16f16:
686; CHECK:       // %bb.0:
687; CHECK-NEXT:    ptrue p0.d, vl16
688; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
689; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.d
690; CHECK-NEXT:    ptrue p0.s, vl16
691; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
692; CHECK-NEXT:    st1h { z0.s }, p0, [x1]
693; CHECK-NEXT:    ret
694  %op1 = load <16 x i64>, ptr %a
695  %res = uitofp <16 x i64> %op1 to <16 x half>
696  store <16 x half> %res, ptr %b
697  ret void
698}
699
700define void @ucvtf_v32i64_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
701; CHECK-LABEL: ucvtf_v32i64_v32f16:
702; CHECK:       // %bb.0:
703; CHECK-NEXT:    ptrue p0.d, vl32
704; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
705; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.d
706; CHECK-NEXT:    ptrue p0.s, vl32
707; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
708; CHECK-NEXT:    st1h { z0.s }, p0, [x1]
709; CHECK-NEXT:    ret
710  %op1 = load <32 x i64>, ptr %a
711  %res = uitofp <32 x i64> %op1 to <32 x half>
712  store <32 x half> %res, ptr %b
713  ret void
714}
715
716;
717; UCVTF D -> S
718;
719
720; Don't use SVE for 64-bit vectors.
721define <1 x float> @ucvtf_v1i64_v1f32(<1 x i64> %op1) vscale_range(2,0) #0 {
722; CHECK-LABEL: ucvtf_v1i64_v1f32:
723; CHECK:       // %bb.0:
724; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
725; CHECK-NEXT:    ucvtf v0.2d, v0.2d
726; CHECK-NEXT:    fcvtn v0.2s, v0.2d
727; CHECK-NEXT:    ret
728  %res = uitofp <1 x i64> %op1 to <1 x float>
729  ret <1 x float> %res
730}
731
732; Don't use SVE for 128-bit vectors.
733define <2 x float> @ucvtf_v2i64_v2f32(<2 x i64> %op1) vscale_range(2,0) #0 {
734; CHECK-LABEL: ucvtf_v2i64_v2f32:
735; CHECK:       // %bb.0:
736; CHECK-NEXT:    ucvtf v0.2d, v0.2d
737; CHECK-NEXT:    fcvtn v0.2s, v0.2d
738; CHECK-NEXT:    ret
739  %res = uitofp <2 x i64> %op1 to <2 x float>
740  ret <2 x float> %res
741}
742
743define <4 x float> @ucvtf_v4i64_v4f32(ptr %a) vscale_range(2,0) #0 {
744; CHECK-LABEL: ucvtf_v4i64_v4f32:
745; CHECK:       // %bb.0:
746; CHECK-NEXT:    ptrue p0.d, vl4
747; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
748; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.d
749; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
750; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
751; CHECK-NEXT:    ret
752  %op1 = load <4 x i64>, ptr %a
753  %res = uitofp <4 x i64> %op1 to <4 x float>
754  ret <4 x float> %res
755}
756
757define void @ucvtf_v8i64_v8f32(ptr %a, ptr %b) #0 {
758; VBITS_GE_256-LABEL: ucvtf_v8i64_v8f32:
759; VBITS_GE_256:       // %bb.0:
760; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
761; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
762; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
763; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
764; VBITS_GE_256-NEXT:    ucvtf z0.s, p0/m, z0.d
765; VBITS_GE_256-NEXT:    ucvtf z1.s, p0/m, z1.d
766; VBITS_GE_256-NEXT:    ptrue p0.s, vl4
767; VBITS_GE_256-NEXT:    uzp1 z0.s, z0.s, z0.s
768; VBITS_GE_256-NEXT:    uzp1 z1.s, z1.s, z1.s
769; VBITS_GE_256-NEXT:    splice z1.s, p0, z1.s, z0.s
770; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
771; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x1]
772; VBITS_GE_256-NEXT:    ret
773;
774; VBITS_GE_512-LABEL: ucvtf_v8i64_v8f32:
775; VBITS_GE_512:       // %bb.0:
776; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
777; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
778; VBITS_GE_512-NEXT:    ucvtf z0.s, p0/m, z0.d
779; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
780; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
781; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x1]
782; VBITS_GE_512-NEXT:    ret
783  %op1 = load <8 x i64>, ptr %a
784  %res = uitofp <8 x i64> %op1 to <8 x float>
785  store <8 x float> %res, ptr %b
786  ret void
787}
788
789define void @ucvtf_v16i64_v16f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
790; CHECK-LABEL: ucvtf_v16i64_v16f32:
791; CHECK:       // %bb.0:
792; CHECK-NEXT:    ptrue p0.d, vl16
793; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
794; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.d
795; CHECK-NEXT:    ptrue p0.s, vl16
796; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
797; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
798; CHECK-NEXT:    ret
799  %op1 = load <16 x i64>, ptr %a
800  %res = uitofp <16 x i64> %op1 to <16 x float>
801  store <16 x float> %res, ptr %b
802  ret void
803}
804
805define void @ucvtf_v32i64_v32f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
806; CHECK-LABEL: ucvtf_v32i64_v32f32:
807; CHECK:       // %bb.0:
808; CHECK-NEXT:    ptrue p0.d, vl32
809; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
810; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.d
811; CHECK-NEXT:    ptrue p0.s, vl32
812; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
813; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
814; CHECK-NEXT:    ret
815  %op1 = load <32 x i64>, ptr %a
816  %res = uitofp <32 x i64> %op1 to <32 x float>
817  store <32 x float> %res, ptr %b
818  ret void
819}
820
821;
822; UCVTF D -> D
823;
824
825; Don't use SVE for 64-bit vectors.
826define <1 x double> @ucvtf_v1i64_v1f64(<1 x i64> %op1) vscale_range(2,0) #0 {
827; CHECK-LABEL: ucvtf_v1i64_v1f64:
828; CHECK:       // %bb.0:
829; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
830; CHECK-NEXT:    ucvtf d0, d0
831; CHECK-NEXT:    ret
832  %res = uitofp <1 x i64> %op1 to <1 x double>
833  ret <1 x double> %res
834}
835
836; Don't use SVE for 128-bit vectors.
837define <2 x double> @ucvtf_v2i64_v2f64(<2 x i64> %op1) vscale_range(2,0) #0 {
838; CHECK-LABEL: ucvtf_v2i64_v2f64:
839; CHECK:       // %bb.0:
840; CHECK-NEXT:    ucvtf v0.2d, v0.2d
841; CHECK-NEXT:    ret
842  %res = uitofp <2 x i64> %op1 to <2 x double>
843  ret <2 x double> %res
844}
845
846define void @ucvtf_v4i64_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
847; CHECK-LABEL: ucvtf_v4i64_v4f64:
848; CHECK:       // %bb.0:
849; CHECK-NEXT:    ptrue p0.d, vl4
850; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
851; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
852; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
853; CHECK-NEXT:    ret
854  %op1 = load <4 x i64>, ptr %a
855  %res = uitofp <4 x i64> %op1 to <4 x double>
856  store <4 x double> %res, ptr %b
857  ret void
858}
859
860define void @ucvtf_v8i64_v8f64(ptr %a, ptr %b) #0 {
861; VBITS_GE_256-LABEL: ucvtf_v8i64_v8f64:
862; VBITS_GE_256:       // %bb.0:
863; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
864; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
865; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
866; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
867; VBITS_GE_256-NEXT:    ucvtf z0.d, p0/m, z0.d
868; VBITS_GE_256-NEXT:    ucvtf z1.d, p0/m, z1.d
869; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
870; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x1]
871; VBITS_GE_256-NEXT:    ret
872;
873; VBITS_GE_512-LABEL: ucvtf_v8i64_v8f64:
874; VBITS_GE_512:       // %bb.0:
875; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
876; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
877; VBITS_GE_512-NEXT:    ucvtf z0.d, p0/m, z0.d
878; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x1]
879; VBITS_GE_512-NEXT:    ret
880  %op1 = load <8 x i64>, ptr %a
881  %res = uitofp <8 x i64> %op1 to <8 x double>
882  store <8 x double> %res, ptr %b
883  ret void
884}
885
886define void @ucvtf_v16i64_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
887; CHECK-LABEL: ucvtf_v16i64_v16f64:
888; CHECK:       // %bb.0:
889; CHECK-NEXT:    ptrue p0.d, vl16
890; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
891; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
892; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
893; CHECK-NEXT:    ret
894  %op1 = load <16 x i64>, ptr %a
895  %res = uitofp <16 x i64> %op1 to <16 x double>
896  store <16 x double> %res, ptr %b
897  ret void
898}
899
900define void @ucvtf_v32i64_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
901; CHECK-LABEL: ucvtf_v32i64_v32f64:
902; CHECK:       // %bb.0:
903; CHECK-NEXT:    ptrue p0.d, vl32
904; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
905; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
906; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
907; CHECK-NEXT:    ret
908  %op1 = load <32 x i64>, ptr %a
909  %res = uitofp <32 x i64> %op1 to <32 x double>
910  store <32 x double> %res, ptr %b
911  ret void
912}
913
914;
915; SCVTF H -> H
916;
917
918; Don't use SVE for 64-bit vectors.
919define <4 x half> @scvtf_v4i16_v4f16(<4 x i16> %op1) vscale_range(2,0) #0 {
920; CHECK-LABEL: scvtf_v4i16_v4f16:
921; CHECK:       // %bb.0:
922; CHECK-NEXT:    scvtf v0.4h, v0.4h
923; CHECK-NEXT:    ret
924  %res = sitofp <4 x i16> %op1 to <4 x half>
925  ret <4 x half> %res
926}
927
928; Don't use SVE for 128-bit vectors.
929define void @scvtf_v8i16_v8f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
930; CHECK-LABEL: scvtf_v8i16_v8f16:
931; CHECK:       // %bb.0:
932; CHECK-NEXT:    ldr q0, [x0]
933; CHECK-NEXT:    scvtf v0.8h, v0.8h
934; CHECK-NEXT:    str q0, [x1]
935; CHECK-NEXT:    ret
936  %op1 = load <8 x i16>, ptr %a
937  %res = sitofp <8 x i16> %op1 to <8 x half>
938  store <8 x half> %res, ptr %b
939  ret void
940}
941
942define void @scvtf_v16i16_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
943; CHECK-LABEL: scvtf_v16i16_v16f16:
944; CHECK:       // %bb.0:
945; CHECK-NEXT:    ptrue p0.h, vl16
946; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
947; CHECK-NEXT:    scvtf z0.h, p0/m, z0.h
948; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
949; CHECK-NEXT:    ret
950  %op1 = load <16 x i16>, ptr %a
951  %res = sitofp <16 x i16> %op1 to <16 x half>
952  store <16 x half> %res, ptr %b
953  ret void
954}
955
956define void @scvtf_v32i16_v32f16(ptr %a, ptr %b) #0 {
957; VBITS_GE_256-LABEL: scvtf_v32i16_v32f16:
958; VBITS_GE_256:       // %bb.0:
959; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
960; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
961; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
962; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
963; VBITS_GE_256-NEXT:    scvtf z0.h, p0/m, z0.h
964; VBITS_GE_256-NEXT:    scvtf z1.h, p0/m, z1.h
965; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x1, x8, lsl #1]
966; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x1]
967; VBITS_GE_256-NEXT:    ret
968;
969; VBITS_GE_512-LABEL: scvtf_v32i16_v32f16:
970; VBITS_GE_512:       // %bb.0:
971; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
972; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
973; VBITS_GE_512-NEXT:    scvtf z0.h, p0/m, z0.h
974; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x1]
975; VBITS_GE_512-NEXT:    ret
976  %op1 = load <32 x i16>, ptr %a
977  %res = sitofp <32 x i16> %op1 to <32 x half>
978  store <32 x half> %res, ptr %b
979  ret void
980}
981
982define void @scvtf_v64i16_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
983; CHECK-LABEL: scvtf_v64i16_v64f16:
984; CHECK:       // %bb.0:
985; CHECK-NEXT:    ptrue p0.h, vl64
986; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
987; CHECK-NEXT:    scvtf z0.h, p0/m, z0.h
988; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
989; CHECK-NEXT:    ret
990  %op1 = load <64 x i16>, ptr %a
991  %res = sitofp <64 x i16> %op1 to <64 x half>
992  store <64 x half> %res, ptr %b
993  ret void
994}
995
996define void @scvtf_v128i16_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
997; CHECK-LABEL: scvtf_v128i16_v128f16:
998; CHECK:       // %bb.0:
999; CHECK-NEXT:    ptrue p0.h, vl128
1000; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1001; CHECK-NEXT:    scvtf z0.h, p0/m, z0.h
1002; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
1003; CHECK-NEXT:    ret
1004  %op1 = load <128 x i16>, ptr %a
1005  %res = sitofp <128 x i16> %op1 to <128 x half>
1006  store <128 x half> %res, ptr %b
1007  ret void
1008}
1009
1010;
1011; SCVTF H -> S
1012;
1013
1014; Don't use SVE for 64-bit vectors.
1015define <2 x float> @scvtf_v2i16_v2f32(<2 x i16> %op1) vscale_range(2,0) #0 {
1016; CHECK-LABEL: scvtf_v2i16_v2f32:
1017; CHECK:       // %bb.0:
1018; CHECK-NEXT:    shl v0.2s, v0.2s, #16
1019; CHECK-NEXT:    sshr v0.2s, v0.2s, #16
1020; CHECK-NEXT:    scvtf v0.2s, v0.2s
1021; CHECK-NEXT:    ret
1022  %res = sitofp <2 x i16> %op1 to <2 x float>
1023  ret <2 x float> %res
1024}
1025
1026; Don't use SVE for 128-bit vectors.
1027define <4 x float> @scvtf_v4i16_v4f32(<4 x i16> %op1) vscale_range(2,0) #0 {
1028; CHECK-LABEL: scvtf_v4i16_v4f32:
1029; CHECK:       // %bb.0:
1030; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
1031; CHECK-NEXT:    scvtf v0.4s, v0.4s
1032; CHECK-NEXT:    ret
1033  %res = sitofp <4 x i16> %op1 to <4 x float>
1034  ret <4 x float> %res
1035}
1036
1037define void @scvtf_v8i16_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
1038; CHECK-LABEL: scvtf_v8i16_v8f32:
1039; CHECK:       // %bb.0:
1040; CHECK-NEXT:    ldr q0, [x0]
1041; CHECK-NEXT:    ptrue p0.s, vl8
1042; CHECK-NEXT:    sunpklo z0.s, z0.h
1043; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
1044; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
1045; CHECK-NEXT:    ret
1046  %op1 = load <8 x i16>, ptr %a
1047  %res = sitofp <8 x i16> %op1 to <8 x float>
1048  store <8 x float> %res, ptr %b
1049  ret void
1050}
1051
1052define void @scvtf_v16i16_v16f32(ptr %a, ptr %b) #0 {
1053; VBITS_GE_256-LABEL: scvtf_v16i16_v16f32:
1054; VBITS_GE_256:       // %bb.0:
1055; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
1056; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
1057; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0]
1058; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
1059; VBITS_GE_256-NEXT:    sunpklo z1.s, z0.h
1060; VBITS_GE_256-NEXT:    ext z0.b, z0.b, z0.b, #16
1061; VBITS_GE_256-NEXT:    sunpklo z0.s, z0.h
1062; VBITS_GE_256-NEXT:    scvtf z1.s, p0/m, z1.s
1063; VBITS_GE_256-NEXT:    scvtf z0.s, p0/m, z0.s
1064; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x1]
1065; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
1066; VBITS_GE_256-NEXT:    ret
1067;
1068; VBITS_GE_512-LABEL: scvtf_v16i16_v16f32:
1069; VBITS_GE_512:       // %bb.0:
1070; VBITS_GE_512-NEXT:    ptrue p0.h, vl16
1071; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
1072; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
1073; VBITS_GE_512-NEXT:    sunpklo z0.s, z0.h
1074; VBITS_GE_512-NEXT:    scvtf z0.s, p0/m, z0.s
1075; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x1]
1076; VBITS_GE_512-NEXT:    ret
1077  %op1 = load <16 x i16>, ptr %a
1078  %res = sitofp <16 x i16> %op1 to <16 x float>
1079  store <16 x float> %res, ptr %b
1080  ret void
1081}
1082
1083define void @scvtf_v32i16_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
1084; CHECK-LABEL: scvtf_v32i16_v32f32:
1085; CHECK:       // %bb.0:
1086; CHECK-NEXT:    ptrue p0.h, vl32
1087; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1088; CHECK-NEXT:    ptrue p0.s, vl32
1089; CHECK-NEXT:    sunpklo z0.s, z0.h
1090; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
1091; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
1092; CHECK-NEXT:    ret
1093  %op1 = load <32 x i16>, ptr %a
1094  %res = sitofp <32 x i16> %op1 to <32 x float>
1095  store <32 x float> %res, ptr %b
1096  ret void
1097}
1098
1099define void @scvtf_v64i16_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
1100; CHECK-LABEL: scvtf_v64i16_v64f32:
1101; CHECK:       // %bb.0:
1102; CHECK-NEXT:    ptrue p0.h, vl64
1103; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1104; CHECK-NEXT:    ptrue p0.s, vl64
1105; CHECK-NEXT:    sunpklo z0.s, z0.h
1106; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
1107; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
1108; CHECK-NEXT:    ret
1109  %op1 = load <64 x i16>, ptr %a
1110  %res = sitofp <64 x i16> %op1 to <64 x float>
1111  store <64 x float> %res, ptr %b
1112  ret void
1113}
1114
1115;
1116; SCVTF H -> D
1117;
1118
1119; v1i16 is perfered to be widened to v4i16, which pushes the output into SVE types, so use SVE
1120define <1 x double> @scvtf_v1i16_v1f64(<1 x i16> %op1) vscale_range(2,0) #0 {
1121; CHECK-LABEL: scvtf_v1i16_v1f64:
1122; CHECK:       // %bb.0:
1123; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
1124; CHECK-NEXT:    ptrue p0.d, vl4
1125; CHECK-NEXT:    sunpklo z0.s, z0.h
1126; CHECK-NEXT:    sunpklo z0.d, z0.s
1127; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
1128; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1129; CHECK-NEXT:    ret
1130  %res = sitofp <1 x i16> %op1 to <1 x double>
1131  ret <1 x double> %res
1132}
1133
1134; Don't use SVE for 128-bit vectors.
1135define <2 x double> @scvtf_v2i16_v2f64(<2 x i16> %op1) vscale_range(2,0) #0 {
1136; CHECK-LABEL: scvtf_v2i16_v2f64:
1137; CHECK:       // %bb.0:
1138; CHECK-NEXT:    shl v0.2s, v0.2s, #16
1139; CHECK-NEXT:    sshr v0.2s, v0.2s, #16
1140; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
1141; CHECK-NEXT:    scvtf v0.2d, v0.2d
1142; CHECK-NEXT:    ret
1143  %res = sitofp <2 x i16> %op1 to <2 x double>
1144  ret <2 x double> %res
1145}
1146
1147define void @scvtf_v4i16_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
1148; CHECK-LABEL: scvtf_v4i16_v4f64:
1149; CHECK:       // %bb.0:
1150; CHECK-NEXT:    ldr d0, [x0]
1151; CHECK-NEXT:    ptrue p0.d, vl4
1152; CHECK-NEXT:    sunpklo z0.s, z0.h
1153; CHECK-NEXT:    sunpklo z0.d, z0.s
1154; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
1155; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
1156; CHECK-NEXT:    ret
1157  %op1 = load <4 x i16>, ptr %a
1158  %res = sitofp <4 x i16> %op1 to <4 x double>
1159  store <4 x double> %res, ptr %b
1160  ret void
1161}
1162
1163define void @scvtf_v8i16_v8f64(ptr %a, ptr %b) #0 {
1164; VBITS_GE_256-LABEL: scvtf_v8i16_v8f64:
1165; VBITS_GE_256:       // %bb.0:
1166; VBITS_GE_256-NEXT:    ldr q0, [x0]
1167; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
1168; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
1169; VBITS_GE_256-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
1170; VBITS_GE_256-NEXT:    sunpklo z0.s, z0.h
1171; VBITS_GE_256-NEXT:    sunpklo z1.s, z1.h
1172; VBITS_GE_256-NEXT:    sunpklo z0.d, z0.s
1173; VBITS_GE_256-NEXT:    sunpklo z1.d, z1.s
1174; VBITS_GE_256-NEXT:    scvtf z0.d, p0/m, z0.d
1175; VBITS_GE_256-NEXT:    scvtf z1.d, p0/m, z1.d
1176; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x1]
1177; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x1, x8, lsl #3]
1178; VBITS_GE_256-NEXT:    ret
1179;
1180; VBITS_GE_512-LABEL: scvtf_v8i16_v8f64:
1181; VBITS_GE_512:       // %bb.0:
1182; VBITS_GE_512-NEXT:    ldr q0, [x0]
1183; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
1184; VBITS_GE_512-NEXT:    sunpklo z0.s, z0.h
1185; VBITS_GE_512-NEXT:    sunpklo z0.d, z0.s
1186; VBITS_GE_512-NEXT:    scvtf z0.d, p0/m, z0.d
1187; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x1]
1188; VBITS_GE_512-NEXT:    ret
1189  %op1 = load <8 x i16>, ptr %a
1190  %res = sitofp <8 x i16> %op1 to <8 x double>
1191  store <8 x double> %res, ptr %b
1192  ret void
1193}
1194
1195define void @scvtf_v16i16_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
1196; CHECK-LABEL: scvtf_v16i16_v16f64:
1197; CHECK:       // %bb.0:
1198; CHECK-NEXT:    ptrue p0.h, vl16
1199; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1200; CHECK-NEXT:    ptrue p0.d, vl16
1201; CHECK-NEXT:    sunpklo z0.s, z0.h
1202; CHECK-NEXT:    sunpklo z0.d, z0.s
1203; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
1204; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
1205; CHECK-NEXT:    ret
1206  %op1 = load <16 x i16>, ptr %a
1207  %res = sitofp <16 x i16> %op1 to <16 x double>
1208  store <16 x double> %res, ptr %b
1209  ret void
1210}
1211
1212define void @scvtf_v32i16_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
1213; CHECK-LABEL: scvtf_v32i16_v32f64:
1214; CHECK:       // %bb.0:
1215; CHECK-NEXT:    ptrue p0.h, vl32
1216; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1217; CHECK-NEXT:    ptrue p0.d, vl32
1218; CHECK-NEXT:    sunpklo z0.s, z0.h
1219; CHECK-NEXT:    sunpklo z0.d, z0.s
1220; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
1221; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
1222; CHECK-NEXT:    ret
1223  %op1 = load <32 x i16>, ptr %a
1224  %res = sitofp <32 x i16> %op1 to <32 x double>
1225  store <32 x double> %res, ptr %b
1226  ret void
1227}
1228
1229;
1230; SCVTF S -> H
1231;
1232
1233; Don't use SVE for 64-bit vectors.
1234define <2 x half> @scvtf_v2i32_v2f16(<2 x i32> %op1) vscale_range(2,0) #0 {
1235; CHECK-LABEL: scvtf_v2i32_v2f16:
1236; CHECK:       // %bb.0:
1237; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1238; CHECK-NEXT:    scvtf v0.4s, v0.4s
1239; CHECK-NEXT:    fcvtn v0.4h, v0.4s
1240; CHECK-NEXT:    ret
1241  %res = sitofp <2 x i32> %op1 to <2 x half>
1242  ret <2 x half> %res
1243}
1244
1245; Don't use SVE for 128-bit vectors.
1246define <4 x half> @scvtf_v4i32_v4f16(<4 x i32> %op1) vscale_range(2,0) #0 {
1247; CHECK-LABEL: scvtf_v4i32_v4f16:
1248; CHECK:       // %bb.0:
1249; CHECK-NEXT:    scvtf v0.4s, v0.4s
1250; CHECK-NEXT:    fcvtn v0.4h, v0.4s
1251; CHECK-NEXT:    ret
1252  %res = sitofp <4 x i32> %op1 to <4 x half>
1253  ret <4 x half> %res
1254}
1255
1256define <8 x half> @scvtf_v8i32_v8f16(ptr %a) vscale_range(2,0) #0 {
1257; CHECK-LABEL: scvtf_v8i32_v8f16:
1258; CHECK:       // %bb.0:
1259; CHECK-NEXT:    ptrue p0.s, vl8
1260; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1261; CHECK-NEXT:    scvtf z0.h, p0/m, z0.s
1262; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
1263; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
1264; CHECK-NEXT:    ret
1265  %op1 = load <8 x i32>, ptr %a
1266  %res = sitofp <8 x i32> %op1 to <8 x half>
1267  ret <8 x half> %res
1268}
1269
1270define void @scvtf_v16i32_v16f16(ptr %a, ptr %b) #0 {
1271; VBITS_GE_256-LABEL: scvtf_v16i32_v16f16:
1272; VBITS_GE_256:       // %bb.0:
1273; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
1274; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
1275; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
1276; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
1277; VBITS_GE_256-NEXT:    scvtf z0.h, p0/m, z0.s
1278; VBITS_GE_256-NEXT:    scvtf z1.h, p0/m, z1.s
1279; VBITS_GE_256-NEXT:    ptrue p0.h, vl8
1280; VBITS_GE_256-NEXT:    uzp1 z0.h, z0.h, z0.h
1281; VBITS_GE_256-NEXT:    uzp1 z1.h, z1.h, z1.h
1282; VBITS_GE_256-NEXT:    splice z1.h, p0, z1.h, z0.h
1283; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
1284; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x1]
1285; VBITS_GE_256-NEXT:    ret
1286;
1287; VBITS_GE_512-LABEL: scvtf_v16i32_v16f16:
1288; VBITS_GE_512:       // %bb.0:
1289; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
1290; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
1291; VBITS_GE_512-NEXT:    scvtf z0.h, p0/m, z0.s
1292; VBITS_GE_512-NEXT:    ptrue p0.h, vl16
1293; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
1294; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x1]
1295; VBITS_GE_512-NEXT:    ret
1296  %op1 = load <16 x i32>, ptr %a
1297  %res = sitofp <16 x i32> %op1 to <16 x half>
1298  store <16 x half> %res, ptr %b
1299  ret void
1300}
1301
1302define void @scvtf_v32i32_v32f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
1303; CHECK-LABEL: scvtf_v32i32_v32f16:
1304; CHECK:       // %bb.0:
1305; CHECK-NEXT:    ptrue p0.s, vl32
1306; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1307; CHECK-NEXT:    scvtf z0.h, p0/m, z0.s
1308; CHECK-NEXT:    ptrue p0.h, vl32
1309; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
1310; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
1311; CHECK-NEXT:    ret
1312  %op1 = load <32 x i32>, ptr %a
1313  %res = sitofp <32 x i32> %op1 to <32 x half>
1314  store <32 x half> %res, ptr %b
1315  ret void
1316}
1317
1318define void @scvtf_v64i32_v64f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
1319; CHECK-LABEL: scvtf_v64i32_v64f16:
1320; CHECK:       // %bb.0:
1321; CHECK-NEXT:    ptrue p0.s, vl64
1322; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1323; CHECK-NEXT:    scvtf z0.h, p0/m, z0.s
1324; CHECK-NEXT:    ptrue p0.h, vl64
1325; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
1326; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
1327; CHECK-NEXT:    ret
1328  %op1 = load <64 x i32>, ptr %a
1329  %res = sitofp <64 x i32> %op1 to <64 x half>
1330  store <64 x half> %res, ptr %b
1331  ret void
1332}
1333
1334;
1335; SCVTF S -> S
1336;
1337
1338; Don't use SVE for 64-bit vectors.
1339define <2 x float> @scvtf_v2i32_v2f32(<2 x i32> %op1) vscale_range(2,0) #0 {
1340; CHECK-LABEL: scvtf_v2i32_v2f32:
1341; CHECK:       // %bb.0:
1342; CHECK-NEXT:    scvtf v0.2s, v0.2s
1343; CHECK-NEXT:    ret
1344  %res = sitofp <2 x i32> %op1 to <2 x float>
1345  ret <2 x float> %res
1346}
1347
1348; Don't use SVE for 128-bit vectors.
1349define <4 x float> @scvtf_v4i32_v4f32(<4 x i32> %op1) vscale_range(2,0) #0 {
1350; CHECK-LABEL: scvtf_v4i32_v4f32:
1351; CHECK:       // %bb.0:
1352; CHECK-NEXT:    scvtf v0.4s, v0.4s
1353; CHECK-NEXT:    ret
1354  %res = sitofp <4 x i32> %op1 to <4 x float>
1355  ret <4 x float> %res
1356}
1357
1358define void @scvtf_v8i32_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
1359; CHECK-LABEL: scvtf_v8i32_v8f32:
1360; CHECK:       // %bb.0:
1361; CHECK-NEXT:    ptrue p0.s, vl8
1362; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1363; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
1364; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
1365; CHECK-NEXT:    ret
1366  %op1 = load <8 x i32>, ptr %a
1367  %res = sitofp <8 x i32> %op1 to <8 x float>
1368  store <8 x float> %res, ptr %b
1369  ret void
1370}
1371
1372define void @scvtf_v16i32_v16f32(ptr %a, ptr %b) #0 {
1373; VBITS_GE_256-LABEL: scvtf_v16i32_v16f32:
1374; VBITS_GE_256:       // %bb.0:
1375; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
1376; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
1377; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
1378; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
1379; VBITS_GE_256-NEXT:    scvtf z0.s, p0/m, z0.s
1380; VBITS_GE_256-NEXT:    scvtf z1.s, p0/m, z1.s
1381; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
1382; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x1]
1383; VBITS_GE_256-NEXT:    ret
1384;
1385; VBITS_GE_512-LABEL: scvtf_v16i32_v16f32:
1386; VBITS_GE_512:       // %bb.0:
1387; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
1388; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
1389; VBITS_GE_512-NEXT:    scvtf z0.s, p0/m, z0.s
1390; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x1]
1391; VBITS_GE_512-NEXT:    ret
1392  %op1 = load <16 x i32>, ptr %a
1393  %res = sitofp <16 x i32> %op1 to <16 x float>
1394  store <16 x float> %res, ptr %b
1395  ret void
1396}
1397
1398define void @scvtf_v32i32_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
1399; CHECK-LABEL: scvtf_v32i32_v32f32:
1400; CHECK:       // %bb.0:
1401; CHECK-NEXT:    ptrue p0.s, vl32
1402; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1403; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
1404; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
1405; CHECK-NEXT:    ret
1406  %op1 = load <32 x i32>, ptr %a
1407  %res = sitofp <32 x i32> %op1 to <32 x float>
1408  store <32 x float> %res, ptr %b
1409  ret void
1410}
1411
1412define void @scvtf_v64i32_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
1413; CHECK-LABEL: scvtf_v64i32_v64f32:
1414; CHECK:       // %bb.0:
1415; CHECK-NEXT:    ptrue p0.s, vl64
1416; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1417; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
1418; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
1419; CHECK-NEXT:    ret
1420  %op1 = load <64 x i32>, ptr %a
1421  %res = sitofp <64 x i32> %op1 to <64 x float>
1422  store <64 x float> %res, ptr %b
1423  ret void
1424}
1425
1426;
1427; SCVTF S -> D
1428;
1429
1430; Don't use SVE for 64-bit vectors.
1431define <1 x double> @scvtf_v1i32_v1f64(<1 x i32> %op1) vscale_range(2,0) #0 {
1432; CHECK-LABEL: scvtf_v1i32_v1f64:
1433; CHECK:       // %bb.0:
1434; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
1435; CHECK-NEXT:    scvtf v0.2d, v0.2d
1436; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
1437; CHECK-NEXT:    ret
1438  %res = sitofp <1 x i32> %op1 to <1 x double>
1439  ret <1 x double> %res
1440}
1441
1442; Don't use SVE for 128-bit vectors.
1443define <2 x double> @scvtf_v2i32_v2f64(<2 x i32> %op1) vscale_range(2,0) #0 {
1444; CHECK-LABEL: scvtf_v2i32_v2f64:
1445; CHECK:       // %bb.0:
1446; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
1447; CHECK-NEXT:    scvtf v0.2d, v0.2d
1448; CHECK-NEXT:    ret
1449  %res = sitofp <2 x i32> %op1 to <2 x double>
1450  ret <2 x double> %res
1451}
1452
1453define void @scvtf_v4i32_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
1454; CHECK-LABEL: scvtf_v4i32_v4f64:
1455; CHECK:       // %bb.0:
1456; CHECK-NEXT:    ldr q0, [x0]
1457; CHECK-NEXT:    ptrue p0.d, vl4
1458; CHECK-NEXT:    sunpklo z0.d, z0.s
1459; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
1460; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
1461; CHECK-NEXT:    ret
1462  %op1 = load <4 x i32>, ptr %a
1463  %res = sitofp <4 x i32> %op1 to <4 x double>
1464  store <4 x double> %res, ptr %b
1465  ret void
1466}
1467
1468define void @scvtf_v8i32_v8f64(ptr %a, ptr %b) #0 {
1469; VBITS_GE_256-LABEL: scvtf_v8i32_v8f64:
1470; VBITS_GE_256:       // %bb.0:
1471; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
1472; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
1473; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0]
1474; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
1475; VBITS_GE_256-NEXT:    sunpklo z1.d, z0.s
1476; VBITS_GE_256-NEXT:    ext z0.b, z0.b, z0.b, #16
1477; VBITS_GE_256-NEXT:    sunpklo z0.d, z0.s
1478; VBITS_GE_256-NEXT:    scvtf z1.d, p0/m, z1.d
1479; VBITS_GE_256-NEXT:    scvtf z0.d, p0/m, z0.d
1480; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x1]
1481; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
1482; VBITS_GE_256-NEXT:    ret
1483;
1484; VBITS_GE_512-LABEL: scvtf_v8i32_v8f64:
1485; VBITS_GE_512:       // %bb.0:
1486; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
1487; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
1488; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
1489; VBITS_GE_512-NEXT:    sunpklo z0.d, z0.s
1490; VBITS_GE_512-NEXT:    scvtf z0.d, p0/m, z0.d
1491; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x1]
1492; VBITS_GE_512-NEXT:    ret
1493  %op1 = load <8 x i32>, ptr %a
1494  %res = sitofp <8 x i32> %op1 to <8 x double>
1495  store <8 x double> %res, ptr %b
1496  ret void
1497}
1498
1499define void @scvtf_v16i32_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
1500; CHECK-LABEL: scvtf_v16i32_v16f64:
1501; CHECK:       // %bb.0:
1502; CHECK-NEXT:    ptrue p0.s, vl16
1503; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1504; CHECK-NEXT:    ptrue p0.d, vl16
1505; CHECK-NEXT:    sunpklo z0.d, z0.s
1506; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
1507; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
1508; CHECK-NEXT:    ret
1509  %op1 = load <16 x i32>, ptr %a
1510  %res = sitofp <16 x i32> %op1 to <16 x double>
1511  store <16 x double> %res, ptr %b
1512  ret void
1513}
1514
1515define void @scvtf_v32i32_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
1516; CHECK-LABEL: scvtf_v32i32_v32f64:
1517; CHECK:       // %bb.0:
1518; CHECK-NEXT:    ptrue p0.s, vl32
1519; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1520; CHECK-NEXT:    ptrue p0.d, vl32
1521; CHECK-NEXT:    sunpklo z0.d, z0.s
1522; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
1523; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
1524; CHECK-NEXT:    ret
1525  %op1 = load <32 x i32>, ptr %a
1526  %res = sitofp <32 x i32> %op1 to <32 x double>
1527  store <32 x double> %res, ptr %b
1528  ret void
1529}
1530
1531;
1532; SCVTF D -> H
1533;
1534
1535; Don't use SVE for 64-bit vectors.
1536define <1 x half> @scvtf_v1i64_v1f16(<1 x i64> %op1) vscale_range(2,0) #0 {
1537; CHECK-LABEL: scvtf_v1i64_v1f16:
1538; CHECK:       // %bb.0:
1539; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1540; CHECK-NEXT:    fmov x8, d0
1541; CHECK-NEXT:    scvtf h0, x8
1542; CHECK-NEXT:    ret
1543  %res = sitofp <1 x i64> %op1 to <1 x half>
1544  ret <1 x half> %res
1545}
1546
1547; v2f16 is not legal for NEON, so use SVE
1548define <2 x half> @scvtf_v2i64_v2f16(<2 x i64> %op1) vscale_range(2,0) #0 {
1549; CHECK-LABEL: scvtf_v2i64_v2f16:
1550; CHECK:       // %bb.0:
1551; CHECK-NEXT:    ptrue p0.d, vl4
1552; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
1553; CHECK-NEXT:    scvtf z0.h, p0/m, z0.d
1554; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
1555; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
1556; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1557; CHECK-NEXT:    ret
1558  %res = sitofp <2 x i64> %op1 to <2 x half>
1559  ret <2 x half> %res
1560}
1561
1562define <4 x half> @scvtf_v4i64_v4f16(ptr %a) vscale_range(2,0) #0 {
1563; CHECK-LABEL: scvtf_v4i64_v4f16:
1564; CHECK:       // %bb.0:
1565; CHECK-NEXT:    ptrue p0.d, vl4
1566; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1567; CHECK-NEXT:    scvtf z0.h, p0/m, z0.d
1568; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
1569; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
1570; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1571; CHECK-NEXT:    ret
1572  %op1 = load <4 x i64>, ptr %a
1573  %res = sitofp <4 x i64> %op1 to <4 x half>
1574  ret <4 x half> %res
1575}
1576
1577define <8 x half> @scvtf_v8i64_v8f16(ptr %a) #0 {
1578; VBITS_GE_256-LABEL: scvtf_v8i64_v8f16:
1579; VBITS_GE_256:       // %bb.0:
1580; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
1581; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
1582; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1583; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
1584; VBITS_GE_256-NEXT:    scvtf z0.h, p0/m, z0.d
1585; VBITS_GE_256-NEXT:    scvtf z1.h, p0/m, z1.d
1586; VBITS_GE_256-NEXT:    uzp1 z0.s, z0.s, z0.s
1587; VBITS_GE_256-NEXT:    uzp1 z1.s, z1.s, z1.s
1588; VBITS_GE_256-NEXT:    uzp1 z2.h, z0.h, z0.h
1589; VBITS_GE_256-NEXT:    uzp1 z0.h, z1.h, z1.h
1590; VBITS_GE_256-NEXT:    mov v0.d[1], v2.d[0]
1591; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
1592; VBITS_GE_256-NEXT:    ret
1593;
1594; VBITS_GE_512-LABEL: scvtf_v8i64_v8f16:
1595; VBITS_GE_512:       // %bb.0:
1596; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
1597; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
1598; VBITS_GE_512-NEXT:    scvtf z0.h, p0/m, z0.d
1599; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
1600; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
1601; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
1602; VBITS_GE_512-NEXT:    ret
1603  %op1 = load <8 x i64>, ptr %a
1604  %res = sitofp <8 x i64> %op1 to <8 x half>
1605  ret <8 x half> %res
1606}
1607
1608define void @scvtf_v16i64_v16f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
1609; CHECK-LABEL: scvtf_v16i64_v16f16:
1610; CHECK:       // %bb.0:
1611; CHECK-NEXT:    ptrue p0.d, vl16
1612; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1613; CHECK-NEXT:    scvtf z0.h, p0/m, z0.d
1614; CHECK-NEXT:    ptrue p0.s, vl16
1615; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
1616; CHECK-NEXT:    st1h { z0.s }, p0, [x1]
1617; CHECK-NEXT:    ret
1618  %op1 = load <16 x i64>, ptr %a
1619  %res = sitofp <16 x i64> %op1 to <16 x half>
1620  store <16 x half> %res, ptr %b
1621  ret void
1622}
1623
1624define void @scvtf_v32i64_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
1625; CHECK-LABEL: scvtf_v32i64_v32f16:
1626; CHECK:       // %bb.0:
1627; CHECK-NEXT:    ptrue p0.d, vl32
1628; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1629; CHECK-NEXT:    scvtf z0.h, p0/m, z0.d
1630; CHECK-NEXT:    ptrue p0.s, vl32
1631; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
1632; CHECK-NEXT:    st1h { z0.s }, p0, [x1]
1633; CHECK-NEXT:    ret
1634  %op1 = load <32 x i64>, ptr %a
1635  %res = sitofp <32 x i64> %op1 to <32 x half>
1636  store <32 x half> %res, ptr %b
1637  ret void
1638}
1639
1640;
1641; SCVTF D -> S
1642;
1643
1644; Don't use SVE for 64-bit vectors.
1645define <1 x float> @scvtf_v1i64_v1f32(<1 x i64> %op1) vscale_range(2,0) #0 {
1646; CHECK-LABEL: scvtf_v1i64_v1f32:
1647; CHECK:       // %bb.0:
1648; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1649; CHECK-NEXT:    scvtf v0.2d, v0.2d
1650; CHECK-NEXT:    fcvtn v0.2s, v0.2d
1651; CHECK-NEXT:    ret
1652  %res = sitofp <1 x i64> %op1 to <1 x float>
1653  ret <1 x float> %res
1654}
1655
1656; Don't use SVE for 128-bit vectors.
1657define <2 x float> @scvtf_v2i64_v2f32(<2 x i64> %op1) vscale_range(2,0) #0 {
1658; CHECK-LABEL: scvtf_v2i64_v2f32:
1659; CHECK:       // %bb.0:
1660; CHECK-NEXT:    scvtf v0.2d, v0.2d
1661; CHECK-NEXT:    fcvtn v0.2s, v0.2d
1662; CHECK-NEXT:    ret
1663  %res = sitofp <2 x i64> %op1 to <2 x float>
1664  ret <2 x float> %res
1665}
1666
1667define <4 x float> @scvtf_v4i64_v4f32(ptr %a) vscale_range(2,0) #0 {
1668; CHECK-LABEL: scvtf_v4i64_v4f32:
1669; CHECK:       // %bb.0:
1670; CHECK-NEXT:    ptrue p0.d, vl4
1671; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1672; CHECK-NEXT:    scvtf z0.s, p0/m, z0.d
1673; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
1674; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
1675; CHECK-NEXT:    ret
1676  %op1 = load <4 x i64>, ptr %a
1677  %res = sitofp <4 x i64> %op1 to <4 x float>
1678  ret <4 x float> %res
1679}
1680
1681define void @scvtf_v8i64_v8f32(ptr %a, ptr %b) #0 {
1682; VBITS_GE_256-LABEL: scvtf_v8i64_v8f32:
1683; VBITS_GE_256:       // %bb.0:
1684; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
1685; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
1686; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1687; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
1688; VBITS_GE_256-NEXT:    scvtf z0.s, p0/m, z0.d
1689; VBITS_GE_256-NEXT:    scvtf z1.s, p0/m, z1.d
1690; VBITS_GE_256-NEXT:    ptrue p0.s, vl4
1691; VBITS_GE_256-NEXT:    uzp1 z0.s, z0.s, z0.s
1692; VBITS_GE_256-NEXT:    uzp1 z1.s, z1.s, z1.s
1693; VBITS_GE_256-NEXT:    splice z1.s, p0, z1.s, z0.s
1694; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
1695; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x1]
1696; VBITS_GE_256-NEXT:    ret
1697;
1698; VBITS_GE_512-LABEL: scvtf_v8i64_v8f32:
1699; VBITS_GE_512:       // %bb.0:
1700; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
1701; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
1702; VBITS_GE_512-NEXT:    scvtf z0.s, p0/m, z0.d
1703; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
1704; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
1705; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x1]
1706; VBITS_GE_512-NEXT:    ret
1707  %op1 = load <8 x i64>, ptr %a
1708  %res = sitofp <8 x i64> %op1 to <8 x float>
1709  store <8 x float> %res, ptr %b
1710  ret void
1711}
1712
1713define void @scvtf_v16i64_v16f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
1714; CHECK-LABEL: scvtf_v16i64_v16f32:
1715; CHECK:       // %bb.0:
1716; CHECK-NEXT:    ptrue p0.d, vl16
1717; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1718; CHECK-NEXT:    scvtf z0.s, p0/m, z0.d
1719; CHECK-NEXT:    ptrue p0.s, vl16
1720; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
1721; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
1722; CHECK-NEXT:    ret
1723  %op1 = load <16 x i64>, ptr %a
1724  %res = sitofp <16 x i64> %op1 to <16 x float>
1725  store <16 x float> %res, ptr %b
1726  ret void
1727}
1728
1729define void @scvtf_v32i64_v32f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
1730; CHECK-LABEL: scvtf_v32i64_v32f32:
1731; CHECK:       // %bb.0:
1732; CHECK-NEXT:    ptrue p0.d, vl32
1733; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1734; CHECK-NEXT:    scvtf z0.s, p0/m, z0.d
1735; CHECK-NEXT:    ptrue p0.s, vl32
1736; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
1737; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
1738; CHECK-NEXT:    ret
1739  %op1 = load <32 x i64>, ptr %a
1740  %res = sitofp <32 x i64> %op1 to <32 x float>
1741  store <32 x float> %res, ptr %b
1742  ret void
1743}
1744
1745;
1746; SCVTF D -> D
1747;
1748
1749; Don't use SVE for 64-bit vectors.
1750define <1 x double> @scvtf_v1i64_v1f64(<1 x i64> %op1) vscale_range(2,0) #0 {
1751; CHECK-LABEL: scvtf_v1i64_v1f64:
1752; CHECK:       // %bb.0:
1753; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1754; CHECK-NEXT:    scvtf d0, d0
1755; CHECK-NEXT:    ret
1756  %res = sitofp <1 x i64> %op1 to <1 x double>
1757  ret <1 x double> %res
1758}
1759
1760; Don't use SVE for 128-bit vectors.
1761define <2 x double> @scvtf_v2i64_v2f64(<2 x i64> %op1) vscale_range(2,0) #0 {
1762; CHECK-LABEL: scvtf_v2i64_v2f64:
1763; CHECK:       // %bb.0:
1764; CHECK-NEXT:    scvtf v0.2d, v0.2d
1765; CHECK-NEXT:    ret
1766  %res = sitofp <2 x i64> %op1 to <2 x double>
1767  ret <2 x double> %res
1768}
1769
1770define void @scvtf_v4i64_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
1771; CHECK-LABEL: scvtf_v4i64_v4f64:
1772; CHECK:       // %bb.0:
1773; CHECK-NEXT:    ptrue p0.d, vl4
1774; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1775; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
1776; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
1777; CHECK-NEXT:    ret
1778  %op1 = load <4 x i64>, ptr %a
1779  %res = sitofp <4 x i64> %op1 to <4 x double>
1780  store <4 x double> %res, ptr %b
1781  ret void
1782}
1783
1784define void @scvtf_v8i64_v8f64(ptr %a, ptr %b) #0 {
1785; VBITS_GE_256-LABEL: scvtf_v8i64_v8f64:
1786; VBITS_GE_256:       // %bb.0:
1787; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
1788; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
1789; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1790; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
1791; VBITS_GE_256-NEXT:    scvtf z0.d, p0/m, z0.d
1792; VBITS_GE_256-NEXT:    scvtf z1.d, p0/m, z1.d
1793; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
1794; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x1]
1795; VBITS_GE_256-NEXT:    ret
1796;
1797; VBITS_GE_512-LABEL: scvtf_v8i64_v8f64:
1798; VBITS_GE_512:       // %bb.0:
1799; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
1800; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
1801; VBITS_GE_512-NEXT:    scvtf z0.d, p0/m, z0.d
1802; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x1]
1803; VBITS_GE_512-NEXT:    ret
1804  %op1 = load <8 x i64>, ptr %a
1805  %res = sitofp <8 x i64> %op1 to <8 x double>
1806  store <8 x double> %res, ptr %b
1807  ret void
1808}
1809
1810define void @scvtf_v16i64_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
1811; CHECK-LABEL: scvtf_v16i64_v16f64:
1812; CHECK:       // %bb.0:
1813; CHECK-NEXT:    ptrue p0.d, vl16
1814; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1815; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
1816; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
1817; CHECK-NEXT:    ret
1818  %op1 = load <16 x i64>, ptr %a
1819  %res = sitofp <16 x i64> %op1 to <16 x double>
1820  store <16 x double> %res, ptr %b
1821  ret void
1822}
1823
1824define void @scvtf_v32i64_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
1825; CHECK-LABEL: scvtf_v32i64_v32f64:
1826; CHECK:       // %bb.0:
1827; CHECK-NEXT:    ptrue p0.d, vl32
1828; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1829; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
1830; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
1831; CHECK-NEXT:    ret
1832  %op1 = load <32 x i64>, ptr %a
1833  %res = sitofp <32 x i64> %op1 to <32 x double>
1834  store <32 x double> %res, ptr %b
1835  ret void
1836}
1837
1838attributes #0 = { "target-features"="+sve" }
1839