xref: /llvm-project/llvm/test/CodeGen/X86/cvt16.ll (revision 67c3f2b4303972a6dc8ada54efe1d5d80d119a51)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c | FileCheck %s -check-prefix=LIBCALL
3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c | FileCheck %s -check-prefix=F16C
4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c,+soft-float | FileCheck %s -check-prefix=SOFTFLOAT
5; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c,+soft-float | FileCheck %s -check-prefix=SOFTFLOAT
6
7; This is a test for float to half float conversions on x86-64.
8;
9; If flag -soft-float is set, or if there is no F16C support, then:
10; 1) half float to float conversions are
11;    translated into calls to __gnu_h2f_ieee defined
12;    by the compiler runtime library;
13; 2) float to half float conversions are translated into calls
14;    to __gnu_f2h_ieee which expected to be defined by the
15;    compiler runtime library.
16;
17; Otherwise (we have F16C support):
18; 1) half float to float conversion are translated using
19;    vcvtph2ps instructions;
20; 2) float to half float conversions are translated using
21;    vcvtps2ph instructions
22
23
24define void @test1(float %src, ptr %dest) nounwind {
25; LIBCALL-LABEL: test1:
26; LIBCALL:       # %bb.0:
27; LIBCALL-NEXT:    pushq %rbx
28; LIBCALL-NEXT:    movq %rdi, %rbx
29; LIBCALL-NEXT:    callq __truncsfhf2@PLT
30; LIBCALL-NEXT:    pextrw $0, %xmm0, (%rbx)
31; LIBCALL-NEXT:    popq %rbx
32; LIBCALL-NEXT:    retq
33;
34; F16C-LABEL: test1:
35; F16C:       # %bb.0:
36; F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
37; F16C-NEXT:    vpextrw $0, %xmm0, (%rdi)
38; F16C-NEXT:    retq
39;
40; SOFTFLOAT-LABEL: test1:
41; SOFTFLOAT:       # %bb.0:
42; SOFTFLOAT-NEXT:    pushq %rbx
43; SOFTFLOAT-NEXT:    movq %rsi, %rbx
44; SOFTFLOAT-NEXT:    callq __gnu_f2h_ieee@PLT
45; SOFTFLOAT-NEXT:    movw %ax, (%rbx)
46; SOFTFLOAT-NEXT:    popq %rbx
47; SOFTFLOAT-NEXT:    retq
48  %1 = tail call i16 @llvm.convert.to.fp16.f32(float %src)
49  store i16 %1, ptr %dest, align 2
50  ret void
51}
52
53define float @test2(ptr nocapture %src) nounwind {
54; LIBCALL-LABEL: test2:
55; LIBCALL:       # %bb.0:
56; LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
57; LIBCALL-NEXT:    jmp __extendhfsf2@PLT # TAILCALL
58;
59; F16C-LABEL: test2:
60; F16C:       # %bb.0:
61; F16C-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm0
62; F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
63; F16C-NEXT:    retq
64;
65; SOFTFLOAT-LABEL: test2:
66; SOFTFLOAT:       # %bb.0:
67; SOFTFLOAT-NEXT:    pushq %rax
68; SOFTFLOAT-NEXT:    movzwl (%rdi), %edi
69; SOFTFLOAT-NEXT:    callq __gnu_h2f_ieee@PLT
70; SOFTFLOAT-NEXT:    popq %rcx
71; SOFTFLOAT-NEXT:    retq
72  %1 = load i16, ptr %src, align 2
73  %2 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
74  ret float %2
75}
76
77define float @test3(float %src) nounwind uwtable readnone {
78; LIBCALL-LABEL: test3:
79; LIBCALL:       # %bb.0:
80; LIBCALL-NEXT:    pushq %rax
81; LIBCALL-NEXT:    .cfi_def_cfa_offset 16
82; LIBCALL-NEXT:    callq __truncsfhf2@PLT
83; LIBCALL-NEXT:    popq %rax
84; LIBCALL-NEXT:    .cfi_def_cfa_offset 8
85; LIBCALL-NEXT:    jmp __extendhfsf2@PLT # TAILCALL
86;
87; F16C-LABEL: test3:
88; F16C:       # %bb.0:
89; F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
90; F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
91; F16C-NEXT:    retq
92;
93; SOFTFLOAT-LABEL: test3:
94; SOFTFLOAT:       # %bb.0:
95; SOFTFLOAT-NEXT:    pushq %rax
96; SOFTFLOAT-NEXT:    .cfi_def_cfa_offset 16
97; SOFTFLOAT-NEXT:    callq __gnu_f2h_ieee@PLT
98; SOFTFLOAT-NEXT:    movzwl %ax, %edi
99; SOFTFLOAT-NEXT:    callq __gnu_h2f_ieee@PLT
100; SOFTFLOAT-NEXT:    popq %rcx
101; SOFTFLOAT-NEXT:    .cfi_def_cfa_offset 8
102; SOFTFLOAT-NEXT:    retq
103  %1 = tail call i16 @llvm.convert.to.fp16.f32(float %src)
104  %2 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
105  ret float %2
106}
107
108define double @test4(ptr nocapture %src) nounwind {
109; LIBCALL-LABEL: test4:
110; LIBCALL:       # %bb.0:
111; LIBCALL-NEXT:    pushq %rax
112; LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
113; LIBCALL-NEXT:    callq __extendhfsf2@PLT
114; LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
115; LIBCALL-NEXT:    popq %rax
116; LIBCALL-NEXT:    retq
117;
118; F16C-LABEL: test4:
119; F16C:       # %bb.0:
120; F16C-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm0
121; F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
122; F16C-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
123; F16C-NEXT:    retq
124;
125; SOFTFLOAT-LABEL: test4:
126; SOFTFLOAT:       # %bb.0:
127; SOFTFLOAT-NEXT:    pushq %rax
128; SOFTFLOAT-NEXT:    movzwl (%rdi), %edi
129; SOFTFLOAT-NEXT:    callq __gnu_h2f_ieee@PLT
130; SOFTFLOAT-NEXT:    movl %eax, %edi
131; SOFTFLOAT-NEXT:    callq __extendsfdf2@PLT
132; SOFTFLOAT-NEXT:    popq %rcx
133; SOFTFLOAT-NEXT:    retq
134  %1 = load i16, ptr %src, align 2
135  %2 = tail call double @llvm.convert.from.fp16.f64(i16 %1)
136  ret double %2
137}
138
139define i16 @test5(double %src) nounwind {
140; LIBCALL-LABEL: test5:
141; LIBCALL:       # %bb.0:
142; LIBCALL-NEXT:    pushq %rax
143; LIBCALL-NEXT:    callq __truncdfhf2@PLT
144; LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
145; LIBCALL-NEXT:    # kill: def $ax killed $ax killed $eax
146; LIBCALL-NEXT:    popq %rcx
147; LIBCALL-NEXT:    retq
148;
149; F16C-LABEL: test5:
150; F16C:       # %bb.0:
151; F16C-NEXT:    pushq %rax
152; F16C-NEXT:    callq __truncdfhf2@PLT
153; F16C-NEXT:    vpextrw $0, %xmm0, %eax
154; F16C-NEXT:    # kill: def $ax killed $ax killed $eax
155; F16C-NEXT:    popq %rcx
156; F16C-NEXT:    retq
157;
158; SOFTFLOAT-LABEL: test5:
159; SOFTFLOAT:       # %bb.0:
160; SOFTFLOAT-NEXT:    pushq %rax
161; SOFTFLOAT-NEXT:    callq __truncdfhf2@PLT
162; SOFTFLOAT-NEXT:    popq %rcx
163; SOFTFLOAT-NEXT:    retq
164  %val = tail call i16 @llvm.convert.to.fp16.f64(double %src)
165  ret i16 %val
166}
167
168declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
169declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
170declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
171declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone
172