xref: /llvm-project/llvm/test/CodeGen/AArch64/fixed-point-conv-vec-pat.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc < %s | FileCheck %s
3
4target triple = "aarch64"
5
6; First some corner cases
7define <4 x float> @f_v4_s0(<4 x i32> %u) {
8; CHECK-LABEL: f_v4_s0:
9; CHECK:       // %bb.0:
10; CHECK-NEXT:    scvtf v0.4s, v0.4s
11; CHECK-NEXT:    ret
12  %s = ashr exact <4 x i32> %u, <i32 0, i32 0, i32 0, i32 0>
13  %v = sitofp <4 x i32> %s to <4 x float>
14  ret <4 x float> %v
15}
16
17define <4 x float> @f_v4_s1(<4 x i32> %u) {
18; CHECK-LABEL: f_v4_s1:
19; CHECK:       // %bb.0:
20; CHECK-NEXT:    scvtf v0.4s, v0.4s, #1
21; CHECK-NEXT:    ret
22  %s = ashr exact <4 x i32> %u, <i32 1, i32 1, i32 1, i32 1>
23  %v = sitofp <4 x i32> %s to <4 x float>
24  ret <4 x float> %v
25}
26
27define <4 x float> @f_v4_s24_inexact(<4 x i32> %u) {
28; CHECK-LABEL: f_v4_s24_inexact:
29; CHECK:       // %bb.0:
30; CHECK-NEXT:    sshr v0.4s, v0.4s, #24
31; CHECK-NEXT:    scvtf v0.4s, v0.4s
32; CHECK-NEXT:    ret
33  %s = ashr <4 x i32> %u, <i32 24, i32 24, i32 24, i32 24>
34  %v = sitofp <4 x i32> %s to <4 x float>
35  ret <4 x float> %v
36}
37
38define <4 x float> @f_v4_s31(<4 x i32> %u) {
39; CHECK-LABEL: f_v4_s31:
40; CHECK:       // %bb.0:
41; CHECK-NEXT:    cmlt v0.4s, v0.4s, #0
42; CHECK-NEXT:    scvtf v0.4s, v0.4s
43; CHECK-NEXT:    ret
44  %s = ashr <4 x i32> %u, <i32 31, i32 31, i32 31, i32 31>
45  %v = sitofp <4 x i32> %s to <4 x float>
46  ret <4 x float> %v
47}
48
49; Common cases for conversion from signed integer to floating point types
50define <2 x float> @f_v2_s24(<2 x i32> %u) {
51; CHECK-LABEL: f_v2_s24:
52; CHECK:       // %bb.0:
53; CHECK-NEXT:    scvtf v0.2s, v0.2s, #24
54; CHECK-NEXT:    ret
55  %s = ashr exact <2 x i32> %u, <i32 24, i32 24>
56  %v = sitofp <2 x i32> %s to <2 x float>
57  ret <2 x float> %v
58}
59
60define <4 x float> @f_v4_s24(<4 x i32> %u) {
61; CHECK-LABEL: f_v4_s24:
62; CHECK:       // %bb.0:
63; CHECK-NEXT:    scvtf v0.4s, v0.4s, #24
64; CHECK-NEXT:    ret
65  %s = ashr exact <4 x i32> %u, <i32 24, i32 24, i32 24, i32 24>
66  %v = sitofp <4 x i32> %s to <4 x float>
67  ret <4 x float> %v
68}
69
70; Check legalisation to <2 x f64> does not get in the way
71define <8 x double> @d_v8_s64(<8 x i64> %u) {
72; CHECK-LABEL: d_v8_s64:
73; CHECK:       // %bb.0:
74; CHECK-NEXT:    scvtf v0.2d, v0.2d, #56
75; CHECK-NEXT:    scvtf v1.2d, v1.2d, #56
76; CHECK-NEXT:    scvtf v2.2d, v2.2d, #56
77; CHECK-NEXT:    scvtf v3.2d, v3.2d, #56
78; CHECK-NEXT:    ret
79  %s = ashr exact <8 x i64> %u, <i64 56, i64 56, i64 56, i64 56, i64 56, i64 56, i64 56, i64 56>
80  %v = sitofp <8 x i64> %s to <8 x double>
81  ret <8 x double> %v
82}
83
84define <4 x half> @h_v4_s8(<4 x i16> %u) #0 {
85; CHECK-LABEL: h_v4_s8:
86; CHECK:       // %bb.0:
87; CHECK-NEXT:    scvtf v0.4h, v0.4h, #8
88; CHECK-NEXT:    ret
89  %s = ashr exact <4 x i16> %u, <i16 8, i16 8, i16 8, i16 8>
90  %v = sitofp <4 x i16> %s to <4 x half>
91  ret <4 x half> %v
92}
93
94define <8 x half> @h_v8_s8(<8 x i16> %u) #0 {
95; CHECK-LABEL: h_v8_s8:
96; CHECK:       // %bb.0:
97; CHECK-NEXT:    scvtf v0.8h, v0.8h, #8
98; CHECK-NEXT:    ret
99  %s = ashr exact <8 x i16> %u, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
100  %v = sitofp <8 x i16> %s to <8 x half>
101  ret <8 x half> %v
102}
103
104; int-to-fp conversion of element in lane 0 should apply
105; cvtf on vector subregister to avoid fpr->gpr trip
106define float @l0_extract_f_v2s(<2 x i32> %u) {
107; CHECK-LABEL: l0_extract_f_v2s:
108; CHECK:       // %bb.0:
109; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
110; CHECK-NEXT:    scvtf s0, s0
111; CHECK-NEXT:    ret
112  %i = extractelement <2 x i32> %u, i64 0
113  %f = sitofp i32 %i to float
114  ret float %f
115}
116
117; cvtf to use ssub for bottom 32-bits from v2i32
118define float @l0_extract_f_v2u(<2 x i32> %u) {
119; CHECK-LABEL: l0_extract_f_v2u:
120; CHECK:       // %bb.0:
121; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
122; CHECK-NEXT:    ucvtf s0, s0
123; CHECK-NEXT:    ret
124  %i = extractelement <2 x i32> %u, i64 0
125  %f = uitofp i32 %i to float
126  ret float %f
127}
128
129; Pattern should only apply when it is known to be lane 0
130define float @ln_extract_f_v2s(<2 x i32> %u, i64 %n) {
131; CHECK-LABEL: ln_extract_f_v2s:
132; CHECK:       // %bb.0:
133; CHECK-NEXT:    sub sp, sp, #16
134; CHECK-NEXT:    .cfi_def_cfa_offset 16
135; CHECK-NEXT:    add x8, sp, #8
136; CHECK-NEXT:    str d0, [sp, #8]
137; CHECK-NEXT:    bfi x8, x0, #2, #1
138; CHECK-NEXT:    ldr s0, [x8]
139; CHECK-NEXT:    scvtf s0, s0
140; CHECK-NEXT:    add sp, sp, #16
141; CHECK-NEXT:    ret
142  %i = extractelement <2 x i32> %u, i64 %n
143  %f = sitofp i32 %i to float
144  ret float %f
145}
146
147; cvtf to use ssub for bottom 32-bits from v4i32
148define float @l0_extract_f_v4s(<4 x i32> %u) {
149; CHECK-LABEL: l0_extract_f_v4s:
150; CHECK:       // %bb.0:
151; CHECK-NEXT:    scvtf s0, s0
152; CHECK-NEXT:    ret
153  %i = extractelement <4 x i32> %u, i64 0
154  %f = sitofp i32 %i to float
155  ret float %f
156}
157
158define float @l0_extract_f_v4u(<4 x i32> %u) {
159; CHECK-LABEL: l0_extract_f_v4u:
160; CHECK:       // %bb.0:
161; CHECK-NEXT:    ucvtf s0, s0
162; CHECK-NEXT:    ret
163  %i = extractelement <4 x i32> %u, i64 0
164  %f = uitofp i32 %i to float
165  ret float %f
166}
167
168define float @ln_extract_f_v4s(<4 x i32> %u, i64 %n) {
169; CHECK-LABEL: ln_extract_f_v4s:
170; CHECK:       // %bb.0:
171; CHECK-NEXT:    sub sp, sp, #16
172; CHECK-NEXT:    .cfi_def_cfa_offset 16
173; CHECK-NEXT:    mov x8, sp
174; CHECK-NEXT:    str q0, [sp]
175; CHECK-NEXT:    bfi x8, x0, #2, #2
176; CHECK-NEXT:    ldr s0, [x8]
177; CHECK-NEXT:    scvtf s0, s0
178; CHECK-NEXT:    add sp, sp, #16
179; CHECK-NEXT:    ret
180  %i = extractelement <4 x i32> %u, i64 %n
181  %f = sitofp i32 %i to float
182  ret float %f
183}
184
185; cvtf to use dsub for bottom 64-bits from v2i64
186define double @l0_extract_d_v2s(<2 x i64> %u) {
187; CHECK-LABEL: l0_extract_d_v2s:
188; CHECK:       // %bb.0:
189; CHECK-NEXT:    scvtf d0, d0
190; CHECK-NEXT:    ret
191  %i = extractelement <2 x i64> %u, i64 0
192  %f = sitofp i64 %i to double
193  ret double %f
194}
195
196define double @l0_extract_d_v2u(<2 x i64> %u) {
197; CHECK-LABEL: l0_extract_d_v2u:
198; CHECK:       // %bb.0:
199; CHECK-NEXT:    ucvtf d0, d0
200; CHECK-NEXT:    ret
201  %i = extractelement <2 x i64> %u, i64 0
202  %f = uitofp i64 %i to double
203  ret double %f
204}
205
206define double @ln_extract_d_v2s(<2 x i64> %u, i64 %n) {
207; CHECK-LABEL: ln_extract_d_v2s:
208; CHECK:       // %bb.0:
209; CHECK-NEXT:    sub sp, sp, #16
210; CHECK-NEXT:    .cfi_def_cfa_offset 16
211; CHECK-NEXT:    mov x8, sp
212; CHECK-NEXT:    str q0, [sp]
213; CHECK-NEXT:    bfi x8, x0, #3, #1
214; CHECK-NEXT:    ldr d0, [x8]
215; CHECK-NEXT:    scvtf d0, d0
216; CHECK-NEXT:    add sp, sp, #16
217; CHECK-NEXT:    ret
218  %i = extractelement <2 x i64> %u, i64 %n
219  %f = sitofp i64 %i to double
220  ret double %f
221}
222
223; (fullfp16) cvtf to use hsub for bottom 16-bits from v8i16
224define half @l0_extract_h_v8s(<8 x i16> %u) #0 {
225; CHECK-LABEL: l0_extract_h_v8s:
226; CHECK:       // %bb.0:
227; CHECK-NEXT:    scvtf h0, h0
228; CHECK-NEXT:    ret
229  %i = extractelement <8 x i16> %u, i32 0
230  %f = sitofp i16 %i to half
231  ret half %f
232}
233
234define half @l0_extract_h_v8u(<8 x i16> %u) #0 {
235; CHECK-LABEL: l0_extract_h_v8u:
236; CHECK:       // %bb.0:
237; CHECK-NEXT:    ucvtf h0, h0
238; CHECK-NEXT:    ret
239  %i = extractelement <8 x i16> %u, i32 0
240  %f = uitofp i16 %i to half
241  ret half %f
242}
243
244define half @ln_extract_h_v8u(<8 x i16> %u, i32 %n) #0 {
245; CHECK-LABEL: ln_extract_h_v8u:
246; CHECK:       // %bb.0:
247; CHECK-NEXT:    sub sp, sp, #16
248; CHECK-NEXT:    .cfi_def_cfa_offset 16
249; CHECK-NEXT:    mov x8, sp
250; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
251; CHECK-NEXT:    str q0, [sp]
252; CHECK-NEXT:    bfi x8, x0, #1, #3
253; CHECK-NEXT:    ldrh w8, [x8]
254; CHECK-NEXT:    ucvtf h0, w8
255; CHECK-NEXT:    add sp, sp, #16
256; CHECK-NEXT:    ret
257  %i = extractelement <8 x i16> %u, i32 %n
258  %f = uitofp i16 %i to half
259  ret half %f
260}
261
262attributes #0 = { "target-features"="+fullfp16"}
263