xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3
4; FP_EXTEND
5
6define <vscale x 8 x float> @fcvts_nxv8f16(<vscale x 8 x half> %a) {
7; CHECK-LABEL: fcvts_nxv8f16:
8; CHECK:       // %bb.0:
9; CHECK-NEXT:    uunpklo z1.s, z0.h
10; CHECK-NEXT:    uunpkhi z2.s, z0.h
11; CHECK-NEXT:    ptrue p0.s
12; CHECK-NEXT:    movprfx z0, z1
13; CHECK-NEXT:    fcvt z0.s, p0/m, z1.h
14; CHECK-NEXT:    movprfx z1, z2
15; CHECK-NEXT:    fcvt z1.s, p0/m, z2.h
16; CHECK-NEXT:    ret
17  %res = fpext <vscale x 8 x half> %a to <vscale x 8 x float>
18  ret <vscale x 8 x float> %res
19}
20
21define <vscale x 4 x double> @fcvtd_nxv4f16(<vscale x 4 x half> %a) {
22; CHECK-LABEL: fcvtd_nxv4f16:
23; CHECK:       // %bb.0:
24; CHECK-NEXT:    uunpklo z1.d, z0.s
25; CHECK-NEXT:    uunpkhi z2.d, z0.s
26; CHECK-NEXT:    ptrue p0.d
27; CHECK-NEXT:    movprfx z0, z1
28; CHECK-NEXT:    fcvt z0.d, p0/m, z1.h
29; CHECK-NEXT:    movprfx z1, z2
30; CHECK-NEXT:    fcvt z1.d, p0/m, z2.h
31; CHECK-NEXT:    ret
32  %res = fpext <vscale x 4 x half> %a to <vscale x 4 x double>
33  ret <vscale x 4 x double> %res
34}
35
36define <vscale x 8 x double> @fcvtd_nxv8f16(<vscale x 8 x half> %a) {
37; CHECK-LABEL: fcvtd_nxv8f16:
38; CHECK:       // %bb.0:
39; CHECK-NEXT:    uunpklo z1.s, z0.h
40; CHECK-NEXT:    uunpkhi z0.s, z0.h
41; CHECK-NEXT:    ptrue p0.d
42; CHECK-NEXT:    uunpklo z2.d, z1.s
43; CHECK-NEXT:    uunpkhi z1.d, z1.s
44; CHECK-NEXT:    uunpklo z3.d, z0.s
45; CHECK-NEXT:    uunpkhi z4.d, z0.s
46; CHECK-NEXT:    fcvt z1.d, p0/m, z1.h
47; CHECK-NEXT:    movprfx z0, z2
48; CHECK-NEXT:    fcvt z0.d, p0/m, z2.h
49; CHECK-NEXT:    movprfx z2, z3
50; CHECK-NEXT:    fcvt z2.d, p0/m, z3.h
51; CHECK-NEXT:    movprfx z3, z4
52; CHECK-NEXT:    fcvt z3.d, p0/m, z4.h
53; CHECK-NEXT:    ret
54  %res = fpext <vscale x 8 x half> %a to <vscale x 8 x double>
55  ret <vscale x 8 x double> %res
56}
57
58define <vscale x 4 x double> @fcvtd_nxv4f32(<vscale x 4 x float> %a) {
59; CHECK-LABEL: fcvtd_nxv4f32:
60; CHECK:       // %bb.0:
61; CHECK-NEXT:    uunpklo z1.d, z0.s
62; CHECK-NEXT:    uunpkhi z2.d, z0.s
63; CHECK-NEXT:    ptrue p0.d
64; CHECK-NEXT:    movprfx z0, z1
65; CHECK-NEXT:    fcvt z0.d, p0/m, z1.s
66; CHECK-NEXT:    movprfx z1, z2
67; CHECK-NEXT:    fcvt z1.d, p0/m, z2.s
68; CHECK-NEXT:    ret
69  %res = fpext <vscale x 4 x float> %a to <vscale x 4 x double>
70  ret <vscale x 4 x double> %res
71}
72
73define <vscale x 8 x double> @fcvtd_nxv8f32(<vscale x 8 x float> %a) {
74; CHECK-LABEL: fcvtd_nxv8f32:
75; CHECK:       // %bb.0:
76; CHECK-NEXT:    uunpklo z2.d, z0.s
77; CHECK-NEXT:    uunpkhi z3.d, z0.s
78; CHECK-NEXT:    uunpklo z4.d, z1.s
79; CHECK-NEXT:    uunpkhi z5.d, z1.s
80; CHECK-NEXT:    ptrue p0.d
81; CHECK-NEXT:    movprfx z0, z2
82; CHECK-NEXT:    fcvt z0.d, p0/m, z2.s
83; CHECK-NEXT:    movprfx z1, z3
84; CHECK-NEXT:    fcvt z1.d, p0/m, z3.s
85; CHECK-NEXT:    movprfx z2, z4
86; CHECK-NEXT:    fcvt z2.d, p0/m, z4.s
87; CHECK-NEXT:    movprfx z3, z5
88; CHECK-NEXT:    fcvt z3.d, p0/m, z5.s
89; CHECK-NEXT:    ret
90  %res = fpext <vscale x 8 x float> %a to <vscale x 8 x double>
91  ret <vscale x 8 x double> %res
92}
93
94; FP_ROUND
95
96define <vscale x 8 x half> @fcvth_nxv8f32(<vscale x 8 x float> %a) {
97; CHECK-LABEL: fcvth_nxv8f32:
98; CHECK:       // %bb.0:
99; CHECK-NEXT:    ptrue p0.s
100; CHECK-NEXT:    fcvt z1.h, p0/m, z1.s
101; CHECK-NEXT:    fcvt z0.h, p0/m, z0.s
102; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
103; CHECK-NEXT:    ret
104  %res = fptrunc <vscale x 8 x float> %a to <vscale x 8 x half>
105  ret <vscale x 8 x half> %res
106}
107
108define <vscale x 8 x half> @fcvth_nxv8f64(<vscale x 8 x double> %a) {
109; CHECK-LABEL: fcvth_nxv8f64:
110; CHECK:       // %bb.0:
111; CHECK-NEXT:    ptrue p0.d
112; CHECK-NEXT:    fcvt z3.h, p0/m, z3.d
113; CHECK-NEXT:    fcvt z2.h, p0/m, z2.d
114; CHECK-NEXT:    fcvt z1.h, p0/m, z1.d
115; CHECK-NEXT:    fcvt z0.h, p0/m, z0.d
116; CHECK-NEXT:    uzp1 z2.s, z2.s, z3.s
117; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
118; CHECK-NEXT:    uzp1 z0.h, z0.h, z2.h
119; CHECK-NEXT:    ret
120  %res = fptrunc <vscale x 8 x double> %a to <vscale x 8 x half>
121  ret <vscale x 8 x half> %res
122}
123
124define <vscale x 4 x half> @fcvth_nxv4f64(<vscale x 4 x double> %a) {
125; CHECK-LABEL: fcvth_nxv4f64:
126; CHECK:       // %bb.0:
127; CHECK-NEXT:    ptrue p0.d
128; CHECK-NEXT:    fcvt z1.h, p0/m, z1.d
129; CHECK-NEXT:    fcvt z0.h, p0/m, z0.d
130; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
131; CHECK-NEXT:    ret
132  %res = fptrunc <vscale x 4 x double> %a to <vscale x 4 x half>
133  ret <vscale x 4 x half> %res
134}
135
136define <vscale x 4 x float> @fcvts_nxv4f64(<vscale x 4 x double> %a) {
137; CHECK-LABEL: fcvts_nxv4f64:
138; CHECK:       // %bb.0:
139; CHECK-NEXT:    ptrue p0.d
140; CHECK-NEXT:    fcvt z1.s, p0/m, z1.d
141; CHECK-NEXT:    fcvt z0.s, p0/m, z0.d
142; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
143; CHECK-NEXT:    ret
144  %res = fptrunc <vscale x 4 x double> %a to <vscale x 4 x float>
145  ret <vscale x 4 x float> %res
146}
147
148define <vscale x 8 x float> @fcvts_nxv8f64(<vscale x 8 x double> %a) {
149; CHECK-LABEL: fcvts_nxv8f64:
150; CHECK:       // %bb.0:
151; CHECK-NEXT:    ptrue p0.d
152; CHECK-NEXT:    fcvt z1.s, p0/m, z1.d
153; CHECK-NEXT:    fcvt z0.s, p0/m, z0.d
154; CHECK-NEXT:    fcvt z3.s, p0/m, z3.d
155; CHECK-NEXT:    fcvt z2.s, p0/m, z2.d
156; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
157; CHECK-NEXT:    uzp1 z1.s, z2.s, z3.s
158; CHECK-NEXT:    ret
159  %res = fptrunc <vscale x 8 x double> %a to <vscale x 8 x float>
160  ret <vscale x 8 x float> %res
161}
162
163; FP_TO_SINT
164
165; Split operand
166define <vscale x 4 x i32> @fcvtzs_s_nxv4f64(<vscale x 4 x double> %a) {
167; CHECK-LABEL: fcvtzs_s_nxv4f64:
168; CHECK:       // %bb.0:
169; CHECK-NEXT:    ptrue p0.d
170; CHECK-NEXT:    fcvtzs z1.d, p0/m, z1.d
171; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
172; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
173; CHECK-NEXT:    ret
174  %res = fptosi <vscale x 4 x double> %a to <vscale x 4 x i32>
175  ret <vscale x 4 x i32> %res
176}
177
178define <vscale x 8 x i16> @fcvtzs_h_nxv8f64(<vscale x 8 x double> %a) {
179; CHECK-LABEL: fcvtzs_h_nxv8f64:
180; CHECK:       // %bb.0:
181; CHECK-NEXT:    ptrue p0.d
182; CHECK-NEXT:    fcvtzs z3.d, p0/m, z3.d
183; CHECK-NEXT:    fcvtzs z2.d, p0/m, z2.d
184; CHECK-NEXT:    fcvtzs z1.d, p0/m, z1.d
185; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
186; CHECK-NEXT:    uzp1 z2.s, z2.s, z3.s
187; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
188; CHECK-NEXT:    uzp1 z0.h, z0.h, z2.h
189; CHECK-NEXT:    ret
190  %res = fptosi <vscale x 8 x double> %a to <vscale x 8 x i16>
191  ret <vscale x 8 x i16> %res
192}
193
194; Split result
195define <vscale x 4 x i64> @fcvtzs_d_nxv4f32(<vscale x 4 x float> %a) {
196; CHECK-LABEL: fcvtzs_d_nxv4f32:
197; CHECK:       // %bb.0:
198; CHECK-NEXT:    uunpklo z1.d, z0.s
199; CHECK-NEXT:    uunpkhi z2.d, z0.s
200; CHECK-NEXT:    ptrue p0.d
201; CHECK-NEXT:    movprfx z0, z1
202; CHECK-NEXT:    fcvtzs z0.d, p0/m, z1.s
203; CHECK-NEXT:    movprfx z1, z2
204; CHECK-NEXT:    fcvtzs z1.d, p0/m, z2.s
205; CHECK-NEXT:    ret
206  %res = fptosi <vscale x 4 x float> %a to <vscale x 4 x i64>
207  ret <vscale x 4 x i64> %res
208}
209
210define <vscale x 16 x i32> @fcvtzs_s_nxv16f16(<vscale x 16 x half> %a) {
211; CHECK-LABEL: fcvtzs_s_nxv16f16:
212; CHECK:       // %bb.0:
213; CHECK-NEXT:    uunpklo z2.s, z0.h
214; CHECK-NEXT:    uunpkhi z3.s, z0.h
215; CHECK-NEXT:    uunpklo z4.s, z1.h
216; CHECK-NEXT:    uunpkhi z5.s, z1.h
217; CHECK-NEXT:    ptrue p0.s
218; CHECK-NEXT:    movprfx z0, z2
219; CHECK-NEXT:    fcvtzs z0.s, p0/m, z2.h
220; CHECK-NEXT:    movprfx z1, z3
221; CHECK-NEXT:    fcvtzs z1.s, p0/m, z3.h
222; CHECK-NEXT:    movprfx z2, z4
223; CHECK-NEXT:    fcvtzs z2.s, p0/m, z4.h
224; CHECK-NEXT:    movprfx z3, z5
225; CHECK-NEXT:    fcvtzs z3.s, p0/m, z5.h
226; CHECK-NEXT:    ret
227  %res = fptosi <vscale x 16 x half> %a to <vscale x 16 x i32>
228  ret <vscale x 16 x i32> %res
229}
230
231; FP_TO_UINT
232
233; Split operand
234define <vscale x 4 x i32> @fcvtzu_s_nxv4f64(<vscale x 4 x double> %a) {
235; CHECK-LABEL: fcvtzu_s_nxv4f64:
236; CHECK:       // %bb.0:
237; CHECK-NEXT:    ptrue p0.d
238; CHECK-NEXT:    fcvtzs z1.d, p0/m, z1.d
239; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
240; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
241; CHECK-NEXT:    ret
242  %res = fptoui <vscale x 4 x double> %a to <vscale x 4 x i32>
243  ret <vscale x 4 x i32> %res
244}
245
246; Split result
247define <vscale x 4 x i64> @fcvtzu_d_nxv4f32(<vscale x 4 x float> %a) {
248; CHECK-LABEL: fcvtzu_d_nxv4f32:
249; CHECK:       // %bb.0:
250; CHECK-NEXT:    uunpklo z1.d, z0.s
251; CHECK-NEXT:    uunpkhi z2.d, z0.s
252; CHECK-NEXT:    ptrue p0.d
253; CHECK-NEXT:    movprfx z0, z1
254; CHECK-NEXT:    fcvtzu z0.d, p0/m, z1.s
255; CHECK-NEXT:    movprfx z1, z2
256; CHECK-NEXT:    fcvtzu z1.d, p0/m, z2.s
257; CHECK-NEXT:    ret
258  %res = fptoui <vscale x 4 x float> %a to <vscale x 4 x i64>
259  ret <vscale x 4 x i64> %res
260}
261
262; SINT_TO_FP
263
264; Split operand
265define <vscale x 4 x float> @scvtf_s_nxv4i64(<vscale x 4 x i64> %a) {
266; CHECK-LABEL: scvtf_s_nxv4i64:
267; CHECK:       // %bb.0:
268; CHECK-NEXT:    ptrue p0.d
269; CHECK-NEXT:    scvtf z1.s, p0/m, z1.d
270; CHECK-NEXT:    scvtf z0.s, p0/m, z0.d
271; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
272; CHECK-NEXT:    ret
273  %res = sitofp <vscale x 4 x i64> %a to <vscale x 4 x float>
274  ret <vscale x 4 x float> %res
275}
276
277define <vscale x 8 x half> @scvtf_h_nxv8i64(<vscale x 8 x i64> %a) {
278; CHECK-LABEL: scvtf_h_nxv8i64:
279; CHECK:       // %bb.0:
280; CHECK-NEXT:    ptrue p0.d
281; CHECK-NEXT:    scvtf z3.h, p0/m, z3.d
282; CHECK-NEXT:    scvtf z2.h, p0/m, z2.d
283; CHECK-NEXT:    scvtf z1.h, p0/m, z1.d
284; CHECK-NEXT:    scvtf z0.h, p0/m, z0.d
285; CHECK-NEXT:    uzp1 z2.s, z2.s, z3.s
286; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
287; CHECK-NEXT:    uzp1 z0.h, z0.h, z2.h
288; CHECK-NEXT:    ret
289  %res = sitofp <vscale x 8 x i64> %a to <vscale x 8 x half>
290  ret <vscale x 8 x half> %res
291}
292
293; Split result
294define <vscale x 16 x float> @scvtf_s_nxv16i8(<vscale x 16 x i8> %a) {
295; CHECK-LABEL: scvtf_s_nxv16i8:
296; CHECK:       // %bb.0:
297; CHECK-NEXT:    sunpklo z1.h, z0.b
298; CHECK-NEXT:    sunpkhi z0.h, z0.b
299; CHECK-NEXT:    ptrue p0.s
300; CHECK-NEXT:    sunpklo z2.s, z1.h
301; CHECK-NEXT:    sunpkhi z1.s, z1.h
302; CHECK-NEXT:    sunpklo z3.s, z0.h
303; CHECK-NEXT:    sunpkhi z4.s, z0.h
304; CHECK-NEXT:    scvtf z1.s, p0/m, z1.s
305; CHECK-NEXT:    movprfx z0, z2
306; CHECK-NEXT:    scvtf z0.s, p0/m, z2.s
307; CHECK-NEXT:    movprfx z2, z3
308; CHECK-NEXT:    scvtf z2.s, p0/m, z3.s
309; CHECK-NEXT:    movprfx z3, z4
310; CHECK-NEXT:    scvtf z3.s, p0/m, z4.s
311; CHECK-NEXT:    ret
312  %res = sitofp <vscale x 16 x i8> %a to <vscale x 16 x float>
313  ret <vscale x 16 x float> %res
314}
315
316define <vscale x 4 x double> @scvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
317; CHECK-LABEL: scvtf_d_nxv4i32:
318; CHECK:       // %bb.0:
319; CHECK-NEXT:    sunpklo z1.d, z0.s
320; CHECK-NEXT:    sunpkhi z2.d, z0.s
321; CHECK-NEXT:    ptrue p0.d
322; CHECK-NEXT:    movprfx z0, z1
323; CHECK-NEXT:    scvtf z0.d, p0/m, z1.d
324; CHECK-NEXT:    movprfx z1, z2
325; CHECK-NEXT:    scvtf z1.d, p0/m, z2.d
326; CHECK-NEXT:    ret
327  %res = sitofp <vscale x 4 x i32> %a to <vscale x 4 x double>
328  ret <vscale x 4 x double> %res
329}
330
331define <vscale x 4 x double> @scvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
332; CHECK-LABEL: scvtf_d_nxv4i1:
333; CHECK:       // %bb.0:
334; CHECK-NEXT:    punpklo p2.h, p0.b
335; CHECK-NEXT:    punpkhi p0.h, p0.b
336; CHECK-NEXT:    mov z0.d, p2/z, #-1 // =0xffffffffffffffff
337; CHECK-NEXT:    ptrue p1.d
338; CHECK-NEXT:    mov z1.d, p0/z, #-1 // =0xffffffffffffffff
339; CHECK-NEXT:    scvtf z0.d, p1/m, z0.d
340; CHECK-NEXT:    scvtf z1.d, p1/m, z1.d
341; CHECK-NEXT:    ret
342  %res = sitofp <vscale x 4 x i1> %a to <vscale x 4 x double>
343  ret <vscale x 4 x double> %res
344}
345
346; UINT_TO_FP
347
348; Split operand
349define <vscale x 4 x float> @ucvtf_s_nxv4i64(<vscale x 4 x i64> %a) {
350; CHECK-LABEL: ucvtf_s_nxv4i64:
351; CHECK:       // %bb.0:
352; CHECK-NEXT:    ptrue p0.d
353; CHECK-NEXT:    ucvtf z1.s, p0/m, z1.d
354; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.d
355; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
356; CHECK-NEXT:    ret
357  %res = uitofp <vscale x 4 x i64> %a to <vscale x 4 x float>
358  ret <vscale x 4 x float> %res
359}
360
361define <vscale x 8 x half> @ucvtf_h_nxv8i64(<vscale x 8 x i64> %a) {
362; CHECK-LABEL: ucvtf_h_nxv8i64:
363; CHECK:       // %bb.0:
364; CHECK-NEXT:    ptrue p0.d
365; CHECK-NEXT:    ucvtf z3.h, p0/m, z3.d
366; CHECK-NEXT:    ucvtf z2.h, p0/m, z2.d
367; CHECK-NEXT:    ucvtf z1.h, p0/m, z1.d
368; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.d
369; CHECK-NEXT:    uzp1 z2.s, z2.s, z3.s
370; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
371; CHECK-NEXT:    uzp1 z0.h, z0.h, z2.h
372; CHECK-NEXT:    ret
373  %res = uitofp <vscale x 8 x i64> %a to <vscale x 8 x half>
374  ret <vscale x 8 x half> %res
375}
376
377; Split result
378define <vscale x 4 x double> @ucvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
379; CHECK-LABEL: ucvtf_d_nxv4i32:
380; CHECK:       // %bb.0:
381; CHECK-NEXT:    uunpklo z1.d, z0.s
382; CHECK-NEXT:    uunpkhi z2.d, z0.s
383; CHECK-NEXT:    ptrue p0.d
384; CHECK-NEXT:    movprfx z0, z1
385; CHECK-NEXT:    ucvtf z0.d, p0/m, z1.d
386; CHECK-NEXT:    movprfx z1, z2
387; CHECK-NEXT:    ucvtf z1.d, p0/m, z2.d
388; CHECK-NEXT:    ret
389  %res = uitofp <vscale x 4 x i32> %a to <vscale x 4 x double>
390  ret <vscale x 4 x double> %res
391}
392
393define <vscale x 4 x double> @ucvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
394; CHECK-LABEL: ucvtf_d_nxv4i1:
395; CHECK:       // %bb.0:
396; CHECK-NEXT:    punpklo p2.h, p0.b
397; CHECK-NEXT:    punpkhi p0.h, p0.b
398; CHECK-NEXT:    mov z0.d, p2/z, #1 // =0x1
399; CHECK-NEXT:    ptrue p1.d
400; CHECK-NEXT:    mov z1.d, p0/z, #1 // =0x1
401; CHECK-NEXT:    ucvtf z0.d, p1/m, z0.d
402; CHECK-NEXT:    ucvtf z1.d, p1/m, z1.d
403; CHECK-NEXT:    ret
404  %res = uitofp <vscale x 4 x i1> %a to <vscale x 4 x double>
405  ret <vscale x 4 x double> %res
406}
407