xref: /llvm-project/llvm/test/CodeGen/X86/select-narrow-int-to-fp.ll (revision cda2b01df708cc4b5448fa1bdb63ca5e15251545)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2; RUN: llc -o - %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK-NO_FP16
3; RUN: llc -o - %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 -mattr=+avx512fp16 | FileCheck %s --check-prefixes=CHECK-WITH_FP16
4
5; Note: We could check more configurations, but anything with software
6; emulation of fp16 generates a ton of assembly code and is not particularly
7; interesting.
8
9;----------------------------------------
10; i8 input
11;----------------------------------------
12
13; uint8_t to float.
14; - Go from i8 to i32: zext
15; - Convert i32 to float
16define float @uint8ToFloat(i8 %int8) {
17; CHECK-NO_FP16-LABEL: uint8ToFloat:
18; CHECK-NO_FP16:       # %bb.0:
19; CHECK-NO_FP16-NEXT:    movzbl %dil, %eax
20; CHECK-NO_FP16-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
21; CHECK-NO_FP16-NEXT:    retq
22;
23; CHECK-WITH_FP16-LABEL: uint8ToFloat:
24; CHECK-WITH_FP16:       # %bb.0:
25; CHECK-WITH_FP16-NEXT:    movzbl %dil, %eax
26; CHECK-WITH_FP16-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
27; CHECK-WITH_FP16-NEXT:    retq
28    %fp32 = uitofp i8 %int8 to float
29    ret float %fp32
30}
31
32; vector uint8_t to float.
33; Same as @uint8ToFloat but with vector types.
34define <16 x float> @vector_uint8ToFloat(<16 x i8> %int8) {
35; CHECK-NO_FP16-LABEL: vector_uint8ToFloat:
36; CHECK-NO_FP16:       # %bb.0:
37; CHECK-NO_FP16-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
38; CHECK-NO_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
39; CHECK-NO_FP16-NEXT:    retq
40;
41; CHECK-WITH_FP16-LABEL: vector_uint8ToFloat:
42; CHECK-WITH_FP16:       # %bb.0:
43; CHECK-WITH_FP16-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
44; CHECK-WITH_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
45; CHECK-WITH_FP16-NEXT:    retq
46    %fp32 = uitofp <16 x i8> %int8 to <16 x float>
47    ret <16 x float> %fp32
48}
49
50
51; uint8_t to half.
52;
53; If no half support:
54; - Go from i8 to i32: zext
55; - Convert i32 to float
56; - Trunc from float to half
57;
58; Else if half support:
59; - Go from i8 to i32: zext
60; - Convert i32 to half
61define half @uint8ToHalf(i8 %int8) {
62; CHECK-NO_FP16-LABEL: uint8ToHalf:
63; CHECK-NO_FP16:       # %bb.0:
64; CHECK-NO_FP16-NEXT:    movzbl %dil, %eax
65; CHECK-NO_FP16-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
66; CHECK-NO_FP16-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
67; CHECK-NO_FP16-NEXT:    vmovd %xmm0, %eax
68; CHECK-NO_FP16-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
69; CHECK-NO_FP16-NEXT:    retq
70;
71; CHECK-WITH_FP16-LABEL: uint8ToHalf:
72; CHECK-WITH_FP16:       # %bb.0:
73; CHECK-WITH_FP16-NEXT:    movzbl %dil, %eax
74; CHECK-WITH_FP16-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
75; CHECK-WITH_FP16-NEXT:    retq
76    %fp32 = uitofp i8 %int8 to half
77    ret half %fp32
78}
79
80; vector uint8_t to half.
81;
82; If no half support:
83; - Go from i8 to i32: zext
84; - Convert i32 to float
85; - Trunc from float to half
86;
87; Else if half support:
88; - Go from i8 to i16: zext
89; - Convert i16 to half
90;
91; The difference with the scalar version (uint8ToHalf) is that we use i16
92; for the intermediate type when we have half support.
93define <16 x half> @vector_uint8ToHalf(<16 x i8> %int8) {
94; CHECK-NO_FP16-LABEL: vector_uint8ToHalf:
95; CHECK-NO_FP16:       # %bb.0:
96; CHECK-NO_FP16-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
97; CHECK-NO_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
98; CHECK-NO_FP16-NEXT:    vcvtps2ph $4, %zmm0, %ymm0
99; CHECK-NO_FP16-NEXT:    retq
100;
101; CHECK-WITH_FP16-LABEL: vector_uint8ToHalf:
102; CHECK-WITH_FP16:       # %bb.0:
103; CHECK-WITH_FP16-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
104; CHECK-WITH_FP16-NEXT:    vcvtw2ph %ymm0, %ymm0
105; CHECK-WITH_FP16-NEXT:    retq
106    %fp32 = uitofp <16 x i8> %int8 to <16 x half>
107    ret <16 x half> %fp32
108}
109
110; Same as uint8_t but with the signed variant.
111; I.e., use sext instead of zext.
112define float @sint8ToFloat(i8 %int8) {
113; CHECK-NO_FP16-LABEL: sint8ToFloat:
114; CHECK-NO_FP16:       # %bb.0:
115; CHECK-NO_FP16-NEXT:    movsbl %dil, %eax
116; CHECK-NO_FP16-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
117; CHECK-NO_FP16-NEXT:    retq
118;
119; CHECK-WITH_FP16-LABEL: sint8ToFloat:
120; CHECK-WITH_FP16:       # %bb.0:
121; CHECK-WITH_FP16-NEXT:    movsbl %dil, %eax
122; CHECK-WITH_FP16-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
123; CHECK-WITH_FP16-NEXT:    retq
124    %fp32 = sitofp i8 %int8 to float
125    ret float %fp32
126}
127
128define <16 x float> @vector_sint8ToFloat(<16 x i8> %int8) {
129; CHECK-NO_FP16-LABEL: vector_sint8ToFloat:
130; CHECK-NO_FP16:       # %bb.0:
131; CHECK-NO_FP16-NEXT:    vpmovsxbd %xmm0, %zmm0
132; CHECK-NO_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
133; CHECK-NO_FP16-NEXT:    retq
134;
135; CHECK-WITH_FP16-LABEL: vector_sint8ToFloat:
136; CHECK-WITH_FP16:       # %bb.0:
137; CHECK-WITH_FP16-NEXT:    vpmovsxbd %xmm0, %zmm0
138; CHECK-WITH_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
139; CHECK-WITH_FP16-NEXT:    retq
140    %fp32 = sitofp <16 x i8> %int8 to <16 x float>
141    ret <16 x float> %fp32
142}
143
144define half @sint8ToHalf(i8 %int8) {
145; CHECK-NO_FP16-LABEL: sint8ToHalf:
146; CHECK-NO_FP16:       # %bb.0:
147; CHECK-NO_FP16-NEXT:    movsbl %dil, %eax
148; CHECK-NO_FP16-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
149; CHECK-NO_FP16-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
150; CHECK-NO_FP16-NEXT:    vmovd %xmm0, %eax
151; CHECK-NO_FP16-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
152; CHECK-NO_FP16-NEXT:    retq
153;
154; CHECK-WITH_FP16-LABEL: sint8ToHalf:
155; CHECK-WITH_FP16:       # %bb.0:
156; CHECK-WITH_FP16-NEXT:    movsbl %dil, %eax
157; CHECK-WITH_FP16-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
158; CHECK-WITH_FP16-NEXT:    retq
159    %fp32 = sitofp i8 %int8 to half
160    ret half %fp32
161}
162
163define <16 x half> @vector_sint8ToHalf(<16 x i8> %int8) {
164; CHECK-NO_FP16-LABEL: vector_sint8ToHalf:
165; CHECK-NO_FP16:       # %bb.0:
166; CHECK-NO_FP16-NEXT:    vpmovsxbd %xmm0, %zmm0
167; CHECK-NO_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
168; CHECK-NO_FP16-NEXT:    vcvtps2ph $4, %zmm0, %ymm0
169; CHECK-NO_FP16-NEXT:    retq
170;
171; CHECK-WITH_FP16-LABEL: vector_sint8ToHalf:
172; CHECK-WITH_FP16:       # %bb.0:
173; CHECK-WITH_FP16-NEXT:    vpmovsxbw %xmm0, %ymm0
174; CHECK-WITH_FP16-NEXT:    vcvtw2ph %ymm0, %ymm0
175; CHECK-WITH_FP16-NEXT:    retq
176    %fp32 = sitofp <16 x i8> %int8 to <16 x half>
177    ret <16 x half> %fp32
178}
179
180
181;----------------------------------------
182; i16 input
183;----------------------------------------
184
185; Similar lowering as i8, but with i16 as the input type.
186
187define float @uint16ToFloat(i16 %int16) {
188; CHECK-NO_FP16-LABEL: uint16ToFloat:
189; CHECK-NO_FP16:       # %bb.0:
190; CHECK-NO_FP16-NEXT:    movzwl %di, %eax
191; CHECK-NO_FP16-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
192; CHECK-NO_FP16-NEXT:    retq
193;
194; CHECK-WITH_FP16-LABEL: uint16ToFloat:
195; CHECK-WITH_FP16:       # %bb.0:
196; CHECK-WITH_FP16-NEXT:    movzwl %di, %eax
197; CHECK-WITH_FP16-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
198; CHECK-WITH_FP16-NEXT:    retq
199    %fp32 = uitofp i16 %int16 to float
200    ret float %fp32
201}
202
203define <16 x float> @vector_uint16ToFloat(<16 x i16> %int16) {
204; CHECK-NO_FP16-LABEL: vector_uint16ToFloat:
205; CHECK-NO_FP16:       # %bb.0:
206; CHECK-NO_FP16-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
207; CHECK-NO_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
208; CHECK-NO_FP16-NEXT:    retq
209;
210; CHECK-WITH_FP16-LABEL: vector_uint16ToFloat:
211; CHECK-WITH_FP16:       # %bb.0:
212; CHECK-WITH_FP16-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
213; CHECK-WITH_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
214; CHECK-WITH_FP16-NEXT:    retq
215    %fp32 = uitofp <16 x i16> %int16 to <16 x float>
216    ret <16 x float> %fp32
217}
218
219define half @uint16ToHalf(i16 %int16) {
220; CHECK-NO_FP16-LABEL: uint16ToHalf:
221; CHECK-NO_FP16:       # %bb.0:
222; CHECK-NO_FP16-NEXT:    movzwl %di, %eax
223; CHECK-NO_FP16-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
224; CHECK-NO_FP16-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
225; CHECK-NO_FP16-NEXT:    vmovd %xmm0, %eax
226; CHECK-NO_FP16-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
227; CHECK-NO_FP16-NEXT:    retq
228;
229; CHECK-WITH_FP16-LABEL: uint16ToHalf:
230; CHECK-WITH_FP16:       # %bb.0:
231; CHECK-WITH_FP16-NEXT:    movzwl %di, %eax
232; CHECK-WITH_FP16-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
233; CHECK-WITH_FP16-NEXT:    retq
234    %fp32 = uitofp i16 %int16 to half
235    ret half %fp32
236}
237
238define <16 x half> @vector_uint16ToHalf(<16 x i16> %int16) {
239; CHECK-NO_FP16-LABEL: vector_uint16ToHalf:
240; CHECK-NO_FP16:       # %bb.0:
241; CHECK-NO_FP16-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
242; CHECK-NO_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
243; CHECK-NO_FP16-NEXT:    vcvtps2ph $4, %zmm0, %ymm0
244; CHECK-NO_FP16-NEXT:    retq
245;
246; CHECK-WITH_FP16-LABEL: vector_uint16ToHalf:
247; CHECK-WITH_FP16:       # %bb.0:
248; CHECK-WITH_FP16-NEXT:    vcvtuw2ph %ymm0, %ymm0
249; CHECK-WITH_FP16-NEXT:    retq
250    %fp32 = uitofp <16 x i16> %int16 to <16 x half>
251    ret <16 x half> %fp32
252}
253
254define float @sint16ToFloat(i16 %int16) {
255; CHECK-NO_FP16-LABEL: sint16ToFloat:
256; CHECK-NO_FP16:       # %bb.0:
257; CHECK-NO_FP16-NEXT:    movswl %di, %eax
258; CHECK-NO_FP16-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
259; CHECK-NO_FP16-NEXT:    retq
260;
261; CHECK-WITH_FP16-LABEL: sint16ToFloat:
262; CHECK-WITH_FP16:       # %bb.0:
263; CHECK-WITH_FP16-NEXT:    movswl %di, %eax
264; CHECK-WITH_FP16-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
265; CHECK-WITH_FP16-NEXT:    retq
266    %fp32 = sitofp i16 %int16 to float
267    ret float %fp32
268}
269
270define <16 x float> @vector_sint16ToFloat(<16 x i16> %int16) {
271; CHECK-NO_FP16-LABEL: vector_sint16ToFloat:
272; CHECK-NO_FP16:       # %bb.0:
273; CHECK-NO_FP16-NEXT:    vpmovsxwd %ymm0, %zmm0
274; CHECK-NO_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
275; CHECK-NO_FP16-NEXT:    retq
276;
277; CHECK-WITH_FP16-LABEL: vector_sint16ToFloat:
278; CHECK-WITH_FP16:       # %bb.0:
279; CHECK-WITH_FP16-NEXT:    vpmovsxwd %ymm0, %zmm0
280; CHECK-WITH_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
281; CHECK-WITH_FP16-NEXT:    retq
282    %fp32 = sitofp <16 x i16> %int16 to <16 x float>
283    ret <16 x float> %fp32
284}
285
286define half @sint16ToHalf(i16 %int16) {
287; CHECK-NO_FP16-LABEL: sint16ToHalf:
288; CHECK-NO_FP16:       # %bb.0:
289; CHECK-NO_FP16-NEXT:    movswl %di, %eax
290; CHECK-NO_FP16-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
291; CHECK-NO_FP16-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
292; CHECK-NO_FP16-NEXT:    vmovd %xmm0, %eax
293; CHECK-NO_FP16-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
294; CHECK-NO_FP16-NEXT:    retq
295;
296; CHECK-WITH_FP16-LABEL: sint16ToHalf:
297; CHECK-WITH_FP16:       # %bb.0:
298; CHECK-WITH_FP16-NEXT:    movswl %di, %eax
299; CHECK-WITH_FP16-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
300; CHECK-WITH_FP16-NEXT:    retq
301    %fp32 = sitofp i16 %int16 to half
302    ret half %fp32
303}
304
305define <16 x half> @vector_sint16ToHalf(<16 x i16> %int16) {
306; CHECK-NO_FP16-LABEL: vector_sint16ToHalf:
307; CHECK-NO_FP16:       # %bb.0:
308; CHECK-NO_FP16-NEXT:    vpmovsxwd %ymm0, %zmm0
309; CHECK-NO_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
310; CHECK-NO_FP16-NEXT:    vcvtps2ph $4, %zmm0, %ymm0
311; CHECK-NO_FP16-NEXT:    retq
312;
313; CHECK-WITH_FP16-LABEL: vector_sint16ToHalf:
314; CHECK-WITH_FP16:       # %bb.0:
315; CHECK-WITH_FP16-NEXT:    vcvtw2ph %ymm0, %ymm0
316; CHECK-WITH_FP16-NEXT:    retq
317    %fp32 = sitofp <16 x i16> %int16 to <16 x half>
318    ret <16 x half> %fp32
319}
320