xref: /llvm-project/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp-fp16.ll (revision 2bb960302aa5b53fd387c05f917043b421ade53e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2  -O3 | FileCheck %s --check-prefixes=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c  -O3 | FileCheck %s --check-prefixes=AVX,F16C
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f  -O3 | FileCheck %s --check-prefixes=AVX,AVX512
5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X86
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X64
7
8declare half @llvm.experimental.constrained.sitofp.f16.i1(i1, metadata, metadata)
9declare half @llvm.experimental.constrained.sitofp.f16.i8(i8, metadata, metadata)
10declare half @llvm.experimental.constrained.sitofp.f16.i16(i16, metadata, metadata)
11declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata)
12declare half @llvm.experimental.constrained.sitofp.f16.i64(i64, metadata, metadata)
13declare half @llvm.experimental.constrained.uitofp.f16.i1(i1, metadata, metadata)
14declare half @llvm.experimental.constrained.uitofp.f16.i8(i8, metadata, metadata)
15declare half @llvm.experimental.constrained.uitofp.f16.i16(i16, metadata, metadata)
16declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata)
17declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata)
18
19define half @sitofp_i1tof16(i1 %x) #0 {
20; SSE2-LABEL: sitofp_i1tof16:
21; SSE2:       # %bb.0:
22; SSE2-NEXT:    pushq %rax
23; SSE2-NEXT:    andb $1, %dil
24; SSE2-NEXT:    negb %dil
25; SSE2-NEXT:    movsbl %dil, %eax
26; SSE2-NEXT:    cvtsi2ss %eax, %xmm0
27; SSE2-NEXT:    callq __truncsfhf2@PLT
28; SSE2-NEXT:    popq %rax
29; SSE2-NEXT:    retq
30;
31; AVX-LABEL: sitofp_i1tof16:
32; AVX:       # %bb.0:
33; AVX-NEXT:    andb $1, %dil
34; AVX-NEXT:    negb %dil
35; AVX-NEXT:    movsbl %dil, %eax
36; AVX-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
37; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
38; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
39; AVX-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
40; AVX-NEXT:    vmovd %xmm0, %eax
41; AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
42; AVX-NEXT:    retq
43;
44; X86-LABEL: sitofp_i1tof16:
45; X86:       # %bb.0:
46; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
47; X86-NEXT:    andb $1, %al
48; X86-NEXT:    negb %al
49; X86-NEXT:    movsbl %al, %eax
50; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
51; X86-NEXT:    retl
52;
53; X64-LABEL: sitofp_i1tof16:
54; X64:       # %bb.0:
55; X64-NEXT:    andb $1, %dil
56; X64-NEXT:    negb %dil
57; X64-NEXT:    movsbl %dil, %eax
58; X64-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
59; X64-NEXT:    retq
60  %result = call half @llvm.experimental.constrained.sitofp.f16.i1(i1 %x,
61                                               metadata !"round.dynamic",
62                                               metadata !"fpexcept.strict") #0
63  ret half %result
64}
65
66define half @sitofp_i8tof16(i8 %x) #0 {
67; SSE2-LABEL: sitofp_i8tof16:
68; SSE2:       # %bb.0:
69; SSE2-NEXT:    pushq %rax
70; SSE2-NEXT:    movsbl %dil, %eax
71; SSE2-NEXT:    cvtsi2ss %eax, %xmm0
72; SSE2-NEXT:    callq __truncsfhf2@PLT
73; SSE2-NEXT:    popq %rax
74; SSE2-NEXT:    retq
75;
76; AVX-LABEL: sitofp_i8tof16:
77; AVX:       # %bb.0:
78; AVX-NEXT:    movsbl %dil, %eax
79; AVX-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
80; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
81; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
82; AVX-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
83; AVX-NEXT:    vmovd %xmm0, %eax
84; AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
85; AVX-NEXT:    retq
86;
87; X86-LABEL: sitofp_i8tof16:
88; X86:       # %bb.0:
89; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
90; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
91; X86-NEXT:    retl
92;
93; X64-LABEL: sitofp_i8tof16:
94; X64:       # %bb.0:
95; X64-NEXT:    movsbl %dil, %eax
96; X64-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
97; X64-NEXT:    retq
98  %result = call half @llvm.experimental.constrained.sitofp.f16.i8(i8 %x,
99                                               metadata !"round.dynamic",
100                                               metadata !"fpexcept.strict") #0
101  ret half %result
102}
103
104define half @sitofp_i16tof16(i16 %x) #0 {
105; SSE2-LABEL: sitofp_i16tof16:
106; SSE2:       # %bb.0:
107; SSE2-NEXT:    pushq %rax
108; SSE2-NEXT:    movswl %di, %eax
109; SSE2-NEXT:    cvtsi2ss %eax, %xmm0
110; SSE2-NEXT:    callq __truncsfhf2@PLT
111; SSE2-NEXT:    popq %rax
112; SSE2-NEXT:    retq
113;
114; AVX-LABEL: sitofp_i16tof16:
115; AVX:       # %bb.0:
116; AVX-NEXT:    movswl %di, %eax
117; AVX-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
118; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
119; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
120; AVX-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
121; AVX-NEXT:    vmovd %xmm0, %eax
122; AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
123; AVX-NEXT:    retq
124;
125; X86-LABEL: sitofp_i16tof16:
126; X86:       # %bb.0:
127; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
128; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
129; X86-NEXT:    retl
130;
131; X64-LABEL: sitofp_i16tof16:
132; X64:       # %bb.0:
133; X64-NEXT:    movswl %di, %eax
134; X64-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
135; X64-NEXT:    retq
136  %result = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %x,
137                                               metadata !"round.dynamic",
138                                               metadata !"fpexcept.strict") #0
139  ret half %result
140}
141
142define half @sitofp_i32tof16(i32 %x) #0 {
143; SSE2-LABEL: sitofp_i32tof16:
144; SSE2:       # %bb.0:
145; SSE2-NEXT:    pushq %rax
146; SSE2-NEXT:    cvtsi2ss %edi, %xmm0
147; SSE2-NEXT:    callq __truncsfhf2@PLT
148; SSE2-NEXT:    popq %rax
149; SSE2-NEXT:    retq
150;
151; AVX-LABEL: sitofp_i32tof16:
152; AVX:       # %bb.0:
153; AVX-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0
154; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
155; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
156; AVX-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
157; AVX-NEXT:    vmovd %xmm0, %eax
158; AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
159; AVX-NEXT:    retq
160;
161; X86-LABEL: sitofp_i32tof16:
162; X86:       # %bb.0:
163; X86-NEXT:    vcvtsi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
164; X86-NEXT:    retl
165;
166; X64-LABEL: sitofp_i32tof16:
167; X64:       # %bb.0:
168; X64-NEXT:    vcvtsi2sh %edi, %xmm0, %xmm0
169; X64-NEXT:    retq
170  %result = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %x,
171                                               metadata !"round.dynamic",
172                                               metadata !"fpexcept.strict") #0
173  ret half %result
174}
175
176define half @sitofp_i64tof16(i64 %x) #0 {
177; SSE2-LABEL: sitofp_i64tof16:
178; SSE2:       # %bb.0:
179; SSE2-NEXT:    pushq %rax
180; SSE2-NEXT:    cvtsi2ss %rdi, %xmm0
181; SSE2-NEXT:    callq __truncsfhf2@PLT
182; SSE2-NEXT:    popq %rax
183; SSE2-NEXT:    retq
184;
185; AVX-LABEL: sitofp_i64tof16:
186; AVX:       # %bb.0:
187; AVX-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
188; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
189; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
190; AVX-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
191; AVX-NEXT:    vmovd %xmm0, %eax
192; AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
193; AVX-NEXT:    retq
194;
195; X86-LABEL: sitofp_i64tof16:
196; X86:       # %bb.0:
197; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
198; X86-NEXT:    vcvtqq2ph %xmm0, %xmm0
199; X86-NEXT:    retl
200;
201; X64-LABEL: sitofp_i64tof16:
202; X64:       # %bb.0:
203; X64-NEXT:    vcvtsi2sh %rdi, %xmm0, %xmm0
204; X64-NEXT:    retq
205  %result = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %x,
206                                               metadata !"round.dynamic",
207                                               metadata !"fpexcept.strict") #0
208  ret half %result
209}
210
211define half @uitofp_i1tof16(i1 %x) #0 {
212; SSE2-LABEL: uitofp_i1tof16:
213; SSE2:       # %bb.0:
214; SSE2-NEXT:    pushq %rax
215; SSE2-NEXT:    andl $1, %edi
216; SSE2-NEXT:    cvtsi2ss %edi, %xmm0
217; SSE2-NEXT:    callq __truncsfhf2@PLT
218; SSE2-NEXT:    popq %rax
219; SSE2-NEXT:    retq
220;
221; AVX-LABEL: uitofp_i1tof16:
222; AVX:       # %bb.0:
223; AVX-NEXT:    andl $1, %edi
224; AVX-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0
225; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
226; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
227; AVX-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
228; AVX-NEXT:    vmovd %xmm0, %eax
229; AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
230; AVX-NEXT:    retq
231;
232; X86-LABEL: uitofp_i1tof16:
233; X86:       # %bb.0:
234; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
235; X86-NEXT:    andb $1, %al
236; X86-NEXT:    movzbl %al, %eax
237; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
238; X86-NEXT:    retl
239;
240; X64-LABEL: uitofp_i1tof16:
241; X64:       # %bb.0:
242; X64-NEXT:    andl $1, %edi
243; X64-NEXT:    vcvtsi2sh %edi, %xmm0, %xmm0
244; X64-NEXT:    retq
245  %result = call half @llvm.experimental.constrained.uitofp.f16.i1(i1 %x,
246                                               metadata !"round.dynamic",
247                                               metadata !"fpexcept.strict") #0
248  ret half %result
249}
250
251define half @uitofp_i8tof16(i8 %x) #0 {
252; SSE2-LABEL: uitofp_i8tof16:
253; SSE2:       # %bb.0:
254; SSE2-NEXT:    pushq %rax
255; SSE2-NEXT:    movzbl %dil, %eax
256; SSE2-NEXT:    cvtsi2ss %eax, %xmm0
257; SSE2-NEXT:    callq __truncsfhf2@PLT
258; SSE2-NEXT:    popq %rax
259; SSE2-NEXT:    retq
260;
261; AVX-LABEL: uitofp_i8tof16:
262; AVX:       # %bb.0:
263; AVX-NEXT:    movzbl %dil, %eax
264; AVX-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
265; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
266; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
267; AVX-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
268; AVX-NEXT:    vmovd %xmm0, %eax
269; AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
270; AVX-NEXT:    retq
271;
272; X86-LABEL: uitofp_i8tof16:
273; X86:       # %bb.0:
274; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
275; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
276; X86-NEXT:    retl
277;
278; X64-LABEL: uitofp_i8tof16:
279; X64:       # %bb.0:
280; X64-NEXT:    movzbl %dil, %eax
281; X64-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
282; X64-NEXT:    retq
283  %result = call half @llvm.experimental.constrained.uitofp.f16.i8(i8 %x,
284                                               metadata !"round.dynamic",
285                                               metadata !"fpexcept.strict") #0
286  ret half %result
287}
288
289define half @uitofp_i16tof16(i16 %x) #0 {
290; SSE2-LABEL: uitofp_i16tof16:
291; SSE2:       # %bb.0:
292; SSE2-NEXT:    pushq %rax
293; SSE2-NEXT:    movzwl %di, %eax
294; SSE2-NEXT:    cvtsi2ss %eax, %xmm0
295; SSE2-NEXT:    callq __truncsfhf2@PLT
296; SSE2-NEXT:    popq %rax
297; SSE2-NEXT:    retq
298;
299; AVX-LABEL: uitofp_i16tof16:
300; AVX:       # %bb.0:
301; AVX-NEXT:    movzwl %di, %eax
302; AVX-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
303; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
304; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
305; AVX-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
306; AVX-NEXT:    vmovd %xmm0, %eax
307; AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
308; AVX-NEXT:    retq
309;
310; X86-LABEL: uitofp_i16tof16:
311; X86:       # %bb.0:
312; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
313; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
314; X86-NEXT:    retl
315;
316; X64-LABEL: uitofp_i16tof16:
317; X64:       # %bb.0:
318; X64-NEXT:    movzwl %di, %eax
319; X64-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
320; X64-NEXT:    retq
321  %result = call half @llvm.experimental.constrained.uitofp.f16.i16(i16 %x,
322                                               metadata !"round.dynamic",
323                                               metadata !"fpexcept.strict") #0
324  ret half %result
325}
326
327define half @uitofp_i32tof16(i32 %x) #0 {
328; SSE2-LABEL: uitofp_i32tof16:
329; SSE2:       # %bb.0:
330; SSE2-NEXT:    pushq %rax
331; SSE2-NEXT:    movl %edi, %eax
332; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
333; SSE2-NEXT:    callq __truncsfhf2@PLT
334; SSE2-NEXT:    popq %rax
335; SSE2-NEXT:    retq
336;
337; F16C-LABEL: uitofp_i32tof16:
338; F16C:       # %bb.0:
339; F16C-NEXT:    movl %edi, %eax
340; F16C-NEXT:    vcvtsi2ss %rax, %xmm0, %xmm0
341; F16C-NEXT:    vxorps %xmm1, %xmm1, %xmm1
342; F16C-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
343; F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
344; F16C-NEXT:    vmovd %xmm0, %eax
345; F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
346; F16C-NEXT:    retq
347;
348; AVX512-LABEL: uitofp_i32tof16:
349; AVX512:       # %bb.0:
350; AVX512-NEXT:    vcvtusi2ss %edi, %xmm0, %xmm0
351; AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
352; AVX512-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
353; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
354; AVX512-NEXT:    vmovd %xmm0, %eax
355; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
356; AVX512-NEXT:    retq
357;
358; X86-LABEL: uitofp_i32tof16:
359; X86:       # %bb.0:
360; X86-NEXT:    vcvtusi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
361; X86-NEXT:    retl
362;
363; X64-LABEL: uitofp_i32tof16:
364; X64:       # %bb.0:
365; X64-NEXT:    vcvtusi2sh %edi, %xmm0, %xmm0
366; X64-NEXT:    retq
367  %result = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %x,
368                                               metadata !"round.dynamic",
369                                               metadata !"fpexcept.strict") #0
370  ret half %result
371}
372
373define half @uitofp_i64tof16(i64 %x) #0 {
374; SSE2-LABEL: uitofp_i64tof16:
375; SSE2:       # %bb.0:
376; SSE2-NEXT:    movq %rdi, %rax
377; SSE2-NEXT:    shrq %rax
378; SSE2-NEXT:    movl %edi, %ecx
379; SSE2-NEXT:    andl $1, %ecx
380; SSE2-NEXT:    orq %rax, %rcx
381; SSE2-NEXT:    testq %rdi, %rdi
382; SSE2-NEXT:    cmovnsq %rdi, %rcx
383; SSE2-NEXT:    cvtsi2ss %rcx, %xmm1
384; SSE2-NEXT:    movaps %xmm1, %xmm0
385; SSE2-NEXT:    addss %xmm1, %xmm0
386; SSE2-NEXT:    js .LBB9_2
387; SSE2-NEXT:  # %bb.1:
388; SSE2-NEXT:    movaps %xmm1, %xmm0
389; SSE2-NEXT:  .LBB9_2:
390; SSE2-NEXT:    pushq %rax
391; SSE2-NEXT:    callq __truncsfhf2@PLT
392; SSE2-NEXT:    popq %rax
393; SSE2-NEXT:    retq
394;
395; F16C-LABEL: uitofp_i64tof16:
396; F16C:       # %bb.0:
397; F16C-NEXT:    movq %rdi, %rax
398; F16C-NEXT:    shrq %rax
399; F16C-NEXT:    movl %edi, %ecx
400; F16C-NEXT:    andl $1, %ecx
401; F16C-NEXT:    orq %rax, %rcx
402; F16C-NEXT:    testq %rdi, %rdi
403; F16C-NEXT:    cmovnsq %rdi, %rcx
404; F16C-NEXT:    vcvtsi2ss %rcx, %xmm0, %xmm0
405; F16C-NEXT:    jns .LBB9_2
406; F16C-NEXT:  # %bb.1:
407; F16C-NEXT:    vaddss %xmm0, %xmm0, %xmm0
408; F16C-NEXT:  .LBB9_2:
409; F16C-NEXT:    vxorps %xmm1, %xmm1, %xmm1
410; F16C-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
411; F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
412; F16C-NEXT:    vmovd %xmm0, %eax
413; F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
414; F16C-NEXT:    retq
415;
416; AVX512-LABEL: uitofp_i64tof16:
417; AVX512:       # %bb.0:
418; AVX512-NEXT:    vcvtusi2ss %rdi, %xmm0, %xmm0
419; AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
420; AVX512-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
421; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
422; AVX512-NEXT:    vmovd %xmm0, %eax
423; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
424; AVX512-NEXT:    retq
425;
426; X86-LABEL: uitofp_i64tof16:
427; X86:       # %bb.0:
428; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
429; X86-NEXT:    vcvtuqq2ph %xmm0, %xmm0
430; X86-NEXT:    retl
431;
432; X64-LABEL: uitofp_i64tof16:
433; X64:       # %bb.0:
434; X64-NEXT:    vcvtusi2sh %rdi, %xmm0, %xmm0
435; X64-NEXT:    retq
436  %result = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %x,
437                                               metadata !"round.dynamic",
438                                               metadata !"fpexcept.strict") #0
439  ret half %result
440}
441
442attributes #0 = { strictfp nounwind }
443