xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,GENERIC
3; RUN: llc < %s -O0 -fast-isel -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,FAST
4; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* \
5; RUN:          -mtriple=arm64-eabi -aarch64-neon-syntax=apple \
6; RUN:          | FileCheck %s --check-prefixes=GISEL,FALLBACK
7
8; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_f64_f32)
9; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_f64_f32)
10define <2 x double> @test_vcvt_f64_f32(<2 x float> %x) nounwind readnone ssp {
11; CHECK-LABEL: test_vcvt_f64_f32:
12; CHECK:       // %bb.0:
13; CHECK-NEXT:    fcvtl v0.2d, v0.2s
14; CHECK-NEXT:    ret
15;
16; GISEL-LABEL: test_vcvt_f64_f32:
17; GISEL:       // %bb.0:
18; GISEL-NEXT:    fcvtl v0.2d, v0.2s
19; GISEL-NEXT:    ret
20  %vcvt1.i = fpext <2 x float> %x to <2 x double>
21  ret <2 x double> %vcvt1.i
22}
23
24; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_high_f64_f32)
25; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_high_f64_f32)
26define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %x) nounwind readnone ssp {
27; CHECK-LABEL: test_vcvt_high_f64_f32:
28; CHECK:       // %bb.0:
29; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
30; CHECK-NEXT:    ret
31;
32; GISEL-LABEL: test_vcvt_high_f64_f32:
33; GISEL:       // %bb.0:
34; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
35; GISEL-NEXT:    ret
36  %cvt_in = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> <i32 2, i32 3>
37  %vcvt1.i = fpext <2 x float> %cvt_in to <2 x double>
38  ret <2 x double> %vcvt1.i
39}
40
41define <2 x double> @test_vcvt_high_v1f64_f32_bitcast(<4 x float> %x) nounwind readnone ssp {
42; CHECK-LABEL: test_vcvt_high_v1f64_f32_bitcast:
43; CHECK:       // %bb.0:
44; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
45; CHECK-NEXT:    ret
46;
47; GISEL-LABEL: test_vcvt_high_v1f64_f32_bitcast:
48; GISEL:       // %bb.0:
49; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
50; GISEL-NEXT:    ret
51  %bc1 = bitcast <4 x float> %x to <2 x double>
52  %ext = shufflevector <2 x double> %bc1, <2 x double> undef, <1 x i32> <i32 1>
53  %bc2 = bitcast <1 x double> %ext to <2 x float>
54  %r = fpext <2 x float> %bc2 to <2 x double>
55  ret <2 x double> %r
56}
57
58define <2 x double> @test_vcvt_high_v1i64_f32_bitcast(<2 x i64> %x) nounwind readnone ssp {
59; CHECK-LABEL: test_vcvt_high_v1i64_f32_bitcast:
60; CHECK:       // %bb.0:
61; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
62; CHECK-NEXT:    ret
63;
64; GISEL-LABEL: test_vcvt_high_v1i64_f32_bitcast:
65; GISEL:       // %bb.0:
66; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
67; GISEL-NEXT:    ret
68  %ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> <i32 1>
69  %bc2 = bitcast <1 x i64> %ext to <2 x float>
70  %r = fpext <2 x float> %bc2 to <2 x double>
71  ret <2 x double> %r
72}
73
74define <2 x double> @test_vcvt_high_v2i32_f32_bitcast(<4 x i32> %x) nounwind readnone ssp {
75; CHECK-LABEL: test_vcvt_high_v2i32_f32_bitcast:
76; CHECK:       // %bb.0:
77; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
78; CHECK-NEXT:    ret
79;
80; GISEL-LABEL: test_vcvt_high_v2i32_f32_bitcast:
81; GISEL:       // %bb.0:
82; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
83; GISEL-NEXT:    ret
84  %ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
85  %bc2 = bitcast <2 x i32> %ext to <2 x float>
86  %r = fpext <2 x float> %bc2 to <2 x double>
87  ret <2 x double> %r
88}
89
90define <2 x double> @test_vcvt_high_v4i16_f32_bitcast(<8 x i16> %x) nounwind readnone ssp {
91; CHECK-LABEL: test_vcvt_high_v4i16_f32_bitcast:
92; CHECK:       // %bb.0:
93; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
94; CHECK-NEXT:    ret
95;
96; GISEL-LABEL: test_vcvt_high_v4i16_f32_bitcast:
97; GISEL:       // %bb.0:
98; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
99; GISEL-NEXT:    ret
100  %ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
101  %bc2 = bitcast <4 x i16> %ext to <2 x float>
102  %r = fpext <2 x float> %bc2 to <2 x double>
103  ret <2 x double> %r
104}
105
106define <2 x double> @test_vcvt_high_v8i8_f32_bitcast(<16 x i8> %x) nounwind readnone ssp {
107; CHECK-LABEL: test_vcvt_high_v8i8_f32_bitcast:
108; CHECK:       // %bb.0:
109; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
110; CHECK-NEXT:    ret
111;
112; GISEL-LABEL: test_vcvt_high_v8i8_f32_bitcast:
113; GISEL:       // %bb.0:
114; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
115; GISEL-NEXT:    ret
116  %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
117  %bc2 = bitcast <8 x i8> %ext to <2 x float>
118  %r = fpext <2 x float> %bc2 to <2 x double>
119  ret <2 x double> %r
120}
121
122define <4 x float> @test_vcvt_high_v1i64_f16_bitcast(<2 x i64> %x) nounwind readnone ssp {
123; CHECK-LABEL: test_vcvt_high_v1i64_f16_bitcast:
124; CHECK:       // %bb.0:
125; CHECK-NEXT:    fcvtl2 v0.4s, v0.8h
126; CHECK-NEXT:    ret
127;
128; GISEL-LABEL: test_vcvt_high_v1i64_f16_bitcast:
129; GISEL:       // %bb.0:
130; GISEL-NEXT:    fcvtl2 v0.4s, v0.8h
131; GISEL-NEXT:    ret
132  %ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> <i32 1>
133  %bc2 = bitcast <1 x i64> %ext to <4 x half>
134  %r = fpext <4 x half> %bc2 to <4 x float>
135  ret <4 x float> %r
136}
137
138define <4 x float> @test_vcvt_high_v2i32_f16_bitcast(<4 x i32> %x) nounwind readnone ssp {
139; CHECK-LABEL: test_vcvt_high_v2i32_f16_bitcast:
140; CHECK:       // %bb.0:
141; CHECK-NEXT:    fcvtl2 v0.4s, v0.8h
142; CHECK-NEXT:    ret
143;
144; GISEL-LABEL: test_vcvt_high_v2i32_f16_bitcast:
145; GISEL:       // %bb.0:
146; GISEL-NEXT:    fcvtl2 v0.4s, v0.8h
147; GISEL-NEXT:    ret
148  %ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
149  %bc2 = bitcast <2 x i32> %ext to <4 x half>
150  %r = fpext <4 x half> %bc2 to <4 x float>
151  ret <4 x float> %r
152}
153
154define <4 x float> @test_vcvt_high_v4i16_f16_bitcast(<8 x i16> %x) nounwind readnone ssp {
155; CHECK-LABEL: test_vcvt_high_v4i16_f16_bitcast:
156; CHECK:       // %bb.0:
157; CHECK-NEXT:    fcvtl2 v0.4s, v0.8h
158; CHECK-NEXT:    ret
159;
160; GISEL-LABEL: test_vcvt_high_v4i16_f16_bitcast:
161; GISEL:       // %bb.0:
162; GISEL-NEXT:    fcvtl2 v0.4s, v0.8h
163; GISEL-NEXT:    ret
164  %ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
165  %bc2 = bitcast <4 x i16> %ext to <4 x half>
166  %r = fpext <4 x half> %bc2 to <4 x float>
167  ret <4 x float> %r
168}
169
170define <4 x float> @test_vcvt_high_v8i8_f16_bitcast(<16 x i8> %x) nounwind readnone ssp {
171; CHECK-LABEL: test_vcvt_high_v8i8_f16_bitcast:
172; CHECK:       // %bb.0:
173; CHECK-NEXT:    fcvtl2 v0.4s, v0.8h
174; CHECK-NEXT:    ret
175;
176; GISEL-LABEL: test_vcvt_high_v8i8_f16_bitcast:
177; GISEL:       // %bb.0:
178; GISEL-NEXT:    fcvtl2 v0.4s, v0.8h
179; GISEL-NEXT:    ret
180  %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
181  %bc2 = bitcast <8 x i8> %ext to <4 x half>
182  %r = fpext <4 x half> %bc2 to <4 x float>
183  ret <4 x float> %r
184}
185
186; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_f32_f64)
187; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_f32_f64)
188define <2 x float> @test_vcvt_f32_f64(<2 x double> %v) nounwind readnone ssp {
189; CHECK-LABEL: test_vcvt_f32_f64:
190; CHECK:       // %bb.0:
191; CHECK-NEXT:    fcvtn v0.2s, v0.2d
192; CHECK-NEXT:    ret
193;
194; GISEL-LABEL: test_vcvt_f32_f64:
195; GISEL:       // %bb.0:
196; GISEL-NEXT:    fcvtn v0.2s, v0.2d
197; GISEL-NEXT:    ret
198  %vcvt1.i = fptrunc <2 x double> %v to <2 x float>
199  ret <2 x float> %vcvt1.i
200}
201
202; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_bf16_f64)
203; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_bf16_f64)
204define <2 x bfloat> @test_vcvt_bf16_f64(<2 x double> %v) nounwind readnone ssp {
205; GENERIC-LABEL: test_vcvt_bf16_f64:
206; GENERIC:       // %bb.0:
207; GENERIC-NEXT:    fcvtxn v0.2s, v0.2d
208; GENERIC-NEXT:    movi.4s v1, #1
209; GENERIC-NEXT:    movi.4s v2, #127, msl #8
210; GENERIC-NEXT:    ushr.4s v3, v0, #16
211; GENERIC-NEXT:    add.4s v2, v0, v2
212; GENERIC-NEXT:    and.16b v1, v3, v1
213; GENERIC-NEXT:    fcmeq.4s v3, v0, v0
214; GENERIC-NEXT:    orr.4s v0, #64, lsl #16
215; GENERIC-NEXT:    add.4s v1, v1, v2
216; GENERIC-NEXT:    bit.16b v0, v1, v3
217; GENERIC-NEXT:    shrn.4h v0, v0, #16
218; GENERIC-NEXT:    ret
219;
220; FAST-LABEL: test_vcvt_bf16_f64:
221; FAST:       // %bb.0:
222; FAST-NEXT:    fcvtxn v1.2s, v0.2d
223; FAST-NEXT:    // implicit-def: $q0
224; FAST-NEXT:    fmov d0, d1
225; FAST-NEXT:    ushr.4s v1, v0, #16
226; FAST-NEXT:    movi.4s v2, #1
227; FAST-NEXT:    and.16b v1, v1, v2
228; FAST-NEXT:    add.4s v1, v1, v0
229; FAST-NEXT:    movi.4s v2, #127, msl #8
230; FAST-NEXT:    add.4s v1, v1, v2
231; FAST-NEXT:    mov.16b v2, v0
232; FAST-NEXT:    orr.4s v2, #64, lsl #16
233; FAST-NEXT:    fcmeq.4s v0, v0, v0
234; FAST-NEXT:    bsl.16b v0, v1, v2
235; FAST-NEXT:    shrn.4h v0, v0, #16
236; FAST-NEXT:    ret
237;
238; GISEL-LABEL: test_vcvt_bf16_f64:
239; GISEL:       // %bb.0:
240; GISEL-NEXT:    fcvtxn v0.2s, v0.2d
241; GISEL-NEXT:    movi.4s v1, #1
242; GISEL-NEXT:    movi.4s v2, #127, msl #8
243; GISEL-NEXT:    ushr.4s v3, v0, #16
244; GISEL-NEXT:    add.4s v2, v0, v2
245; GISEL-NEXT:    and.16b v1, v3, v1
246; GISEL-NEXT:    fcmeq.4s v3, v0, v0
247; GISEL-NEXT:    orr.4s v0, #64, lsl #16
248; GISEL-NEXT:    add.4s v1, v1, v2
249; GISEL-NEXT:    bit.16b v0, v1, v3
250; GISEL-NEXT:    shrn.4h v0, v0, #16
251; GISEL-NEXT:    ret
252  %vcvt1.i = fptrunc <2 x double> %v to <2 x bfloat>
253  ret <2 x bfloat> %vcvt1.i
254}
255
256define half @test_vcvt_f16_f32(<1 x float> %x) {
257; GENERIC-LABEL: test_vcvt_f16_f32:
258; GENERIC:       // %bb.0:
259; GENERIC-NEXT:    // kill: def $d0 killed $d0 def $q0
260; GENERIC-NEXT:    fcvt h0, s0
261; GENERIC-NEXT:    ret
262;
263; FAST-LABEL: test_vcvt_f16_f32:
264; FAST:       // %bb.0:
265; FAST-NEXT:    fmov d1, d0
266; FAST-NEXT:    // implicit-def: $q0
267; FAST-NEXT:    fmov d0, d1
268; FAST-NEXT:    // kill: def $s0 killed $s0 killed $q0
269; FAST-NEXT:    fcvt h0, s0
270; FAST-NEXT:    ret
271;
272; GISEL-LABEL: test_vcvt_f16_f32:
273; GISEL:       // %bb.0:
274; GISEL-NEXT:    fcvt h0, s0
275; GISEL-NEXT:    ret
276  %tmp = fptrunc <1 x float> %x to <1 x half>
277  %elt = extractelement <1 x half> %tmp, i32 0
278  ret half %elt
279}
280
281; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_high_f32_f64)
282; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_high_f32_f64)
283define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
284; GENERIC-LABEL: test_vcvt_high_f32_f64:
285; GENERIC:       // %bb.0:
286; GENERIC-NEXT:    // kill: def $d0 killed $d0 def $q0
287; GENERIC-NEXT:    fcvtn2 v0.4s, v1.2d
288; GENERIC-NEXT:    ret
289;
290; FAST-LABEL: test_vcvt_high_f32_f64:
291; FAST:       // %bb.0:
292; FAST-NEXT:    fmov d2, d0
293; FAST-NEXT:    // implicit-def: $q0
294; FAST-NEXT:    fmov d0, d2
295; FAST-NEXT:    fcvtn2 v0.4s, v1.2d
296; FAST-NEXT:    ret
297;
298; GISEL-LABEL: test_vcvt_high_f32_f64:
299; GISEL:       // %bb.0:
300; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
301; GISEL-NEXT:    fcvtn2 v0.4s, v1.2d
302; GISEL-NEXT:    ret
303  %cvt = fptrunc <2 x double> %v to <2 x float>
304  %vcvt2.i = shufflevector <2 x float> %x, <2 x float> %cvt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
305  ret <4 x float> %vcvt2.i
306}
307
308define <2 x float> @test_vcvtx_f32_f64(<2 x double> %v) nounwind readnone ssp {
309; CHECK-LABEL: test_vcvtx_f32_f64:
310; CHECK:       // %bb.0:
311; CHECK-NEXT:    fcvtxn v0.2s, v0.2d
312; CHECK-NEXT:    ret
313;
314; GISEL-LABEL: test_vcvtx_f32_f64:
315; GISEL:       // %bb.0:
316; GISEL-NEXT:    fcvtxn v0.2s, v0.2d
317; GISEL-NEXT:    ret
318  %vcvtx1.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
319  ret <2 x float> %vcvtx1.i
320}
321
322define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
323; GENERIC-LABEL: test_vcvtx_high_f32_f64:
324; GENERIC:       // %bb.0:
325; GENERIC-NEXT:    // kill: def $d0 killed $d0 def $q0
326; GENERIC-NEXT:    fcvtxn2 v0.4s, v1.2d
327; GENERIC-NEXT:    ret
328;
329; FAST-LABEL: test_vcvtx_high_f32_f64:
330; FAST:       // %bb.0:
331; FAST-NEXT:    fmov d2, d0
332; FAST-NEXT:    // implicit-def: $q0
333; FAST-NEXT:    fmov d0, d2
334; FAST-NEXT:    fcvtxn2 v0.4s, v1.2d
335; FAST-NEXT:    ret
336;
337; GISEL-LABEL: test_vcvtx_high_f32_f64:
338; GISEL:       // %bb.0:
339; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
340; GISEL-NEXT:    fcvtxn2 v0.4s, v1.2d
341; GISEL-NEXT:    ret
342  %vcvtx2.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
343  %res = shufflevector <2 x float> %x, <2 x float> %vcvtx2.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
344  ret <4 x float> %res
345}
346
347
348declare <2 x double> @llvm.aarch64.neon.vcvthighfp2df(<4 x float>) nounwind readnone
349declare <2 x double> @llvm.aarch64.neon.vcvtfp2df(<2 x float>) nounwind readnone
350
351declare <2 x float> @llvm.aarch64.neon.vcvtdf2fp(<2 x double>) nounwind readnone
352declare <4 x float> @llvm.aarch64.neon.vcvthighdf2fp(<2 x float>, <2 x double>) nounwind readnone
353
354declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone
355
356define i16 @to_half(float %in) {
357; GENERIC-LABEL: to_half:
358; GENERIC:       // %bb.0:
359; GENERIC-NEXT:    fcvt h0, s0
360; GENERIC-NEXT:    fmov w0, s0
361; GENERIC-NEXT:    ret
362;
363; FAST-LABEL: to_half:
364; FAST:       // %bb.0:
365; FAST-NEXT:    fcvt h1, s0
366; FAST-NEXT:    // implicit-def: $w0
367; FAST-NEXT:    fmov s0, w0
368; FAST-NEXT:    fmov s0, s1
369; FAST-NEXT:    fmov w0, s0
370; FAST-NEXT:    // kill: def $w1 killed $w0
371; FAST-NEXT:    ret
372;
373; GISEL-LABEL: to_half:
374; GISEL:       // %bb.0:
375; GISEL-NEXT:    fcvt h0, s0
376; GISEL-NEXT:    fmov w0, s0
377; GISEL-NEXT:    ret
378  %res = call i16 @llvm.convert.to.fp16.f32(float %in)
379  ret i16 %res
380}
381
382define float @from_half(i16 %in) {
383; GENERIC-LABEL: from_half:
384; GENERIC:       // %bb.0:
385; GENERIC-NEXT:    fmov s0, w0
386; GENERIC-NEXT:    fcvt s0, h0
387; GENERIC-NEXT:    ret
388;
389; FAST-LABEL: from_half:
390; FAST:       // %bb.0:
391; FAST-NEXT:    fmov s0, w0
392; FAST-NEXT:    // kill: def $h0 killed $h0 killed $s0
393; FAST-NEXT:    fcvt s0, h0
394; FAST-NEXT:    ret
395;
396; GISEL-LABEL: from_half:
397; GISEL:       // %bb.0:
398; GISEL-NEXT:    fmov s0, w0
399; GISEL-NEXT:    fcvt s0, h0
400; GISEL-NEXT:    ret
401  %res = call float @llvm.convert.from.fp16.f32(i16 %in)
402  ret float %res
403}
404
405declare float @llvm.convert.from.fp16.f32(i16) #1
406declare i16 @llvm.convert.to.fp16.f32(float) #1
407;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
408; FALLBACK: {{.*}}
409