xref: /llvm-project/llvm/test/CodeGen/X86/bfloat-constrained.ll (revision 819abe412dd554303cb932d6ec2200b9b9ebdd78)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefixes=X64
4; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512bf16,+avx512vl | FileCheck %s --check-prefixes=X64
5
6@a = global bfloat 0xR0000, align 2
7@b = global bfloat 0xR0000, align 2
8@c = global bfloat 0xR0000, align 2
9
10define float @bfloat_to_float() strictfp {
11; X86-LABEL: bfloat_to_float:
12; X86:       # %bb.0:
13; X86-NEXT:    subl $12, %esp
14; X86-NEXT:    .cfi_def_cfa_offset 16
15; X86-NEXT:    movzwl a, %eax
16; X86-NEXT:    movl %eax, (%esp)
17; X86-NEXT:    calll __extendbfsf2
18; X86-NEXT:    addl $12, %esp
19; X86-NEXT:    .cfi_def_cfa_offset 4
20; X86-NEXT:    retl
21;
22; X64-LABEL: bfloat_to_float:
23; X64:       # %bb.0:
24; X64-NEXT:    pushq %rax
25; X64-NEXT:    .cfi_def_cfa_offset 16
26; X64-NEXT:    movq a@GOTPCREL(%rip), %rax
27; X64-NEXT:    movzwl (%rax), %edi
28; X64-NEXT:    callq __extendbfsf2@PLT
29; X64-NEXT:    popq %rax
30; X64-NEXT:    .cfi_def_cfa_offset 8
31; X64-NEXT:    retq
32  %1 = load bfloat, ptr @a, align 2
33  %2 = tail call float @llvm.experimental.constrained.fpext.f32.bfloat(bfloat %1, metadata !"fpexcept.strict") #0
34  ret float %2
35}
36
37define double @bfloat_to_double() strictfp {
38; X86-LABEL: bfloat_to_double:
39; X86:       # %bb.0:
40; X86-NEXT:    subl $12, %esp
41; X86-NEXT:    .cfi_def_cfa_offset 16
42; X86-NEXT:    movzwl a, %eax
43; X86-NEXT:    movl %eax, (%esp)
44; X86-NEXT:    calll __extendbfsf2
45; X86-NEXT:    addl $12, %esp
46; X86-NEXT:    .cfi_def_cfa_offset 4
47; X86-NEXT:    retl
48;
49; X64-LABEL: bfloat_to_double:
50; X64:       # %bb.0:
51; X64-NEXT:    pushq %rax
52; X64-NEXT:    .cfi_def_cfa_offset 16
53; X64-NEXT:    movq a@GOTPCREL(%rip), %rax
54; X64-NEXT:    movzwl (%rax), %edi
55; X64-NEXT:    callq __extendbfsf2@PLT
56; X64-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
57; X64-NEXT:    popq %rax
58; X64-NEXT:    .cfi_def_cfa_offset 8
59; X64-NEXT:    retq
60  %1 = load bfloat, ptr @a, align 2
61  %2 = tail call double @llvm.experimental.constrained.fpext.f64.bfloat(bfloat %1, metadata !"fpexcept.strict") #0
62  ret double %2
63}
64
65define void @float_to_bfloat(float %0) strictfp {
66; X86-LABEL: float_to_bfloat:
67; X86:       # %bb.0:
68; X86-NEXT:    subl $12, %esp
69; X86-NEXT:    .cfi_def_cfa_offset 16
70; X86-NEXT:    flds {{[0-9]+}}(%esp)
71; X86-NEXT:    fstps (%esp)
72; X86-NEXT:    wait
73; X86-NEXT:    calll __truncsfbf2
74; X86-NEXT:    movw %ax, a
75; X86-NEXT:    addl $12, %esp
76; X86-NEXT:    .cfi_def_cfa_offset 4
77; X86-NEXT:    retl
78;
79; X64-LABEL: float_to_bfloat:
80; X64:       # %bb.0:
81; X64-NEXT:    pushq %rax
82; X64-NEXT:    .cfi_def_cfa_offset 16
83; X64-NEXT:    callq __truncsfbf2@PLT
84; X64-NEXT:    movq a@GOTPCREL(%rip), %rcx
85; X64-NEXT:    movw %ax, (%rcx)
86; X64-NEXT:    popq %rax
87; X64-NEXT:    .cfi_def_cfa_offset 8
88; X64-NEXT:    retq
89  %2 = tail call bfloat @llvm.experimental.constrained.fptrunc.bf16.f32(float %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
90  store bfloat %2, ptr @a, align 2
91  ret void
92}
93
94define void @double_to_bfloat(double %0) strictfp {
95; X86-LABEL: double_to_bfloat:
96; X86:       # %bb.0:
97; X86-NEXT:    subl $12, %esp
98; X86-NEXT:    .cfi_def_cfa_offset 16
99; X86-NEXT:    fldl {{[0-9]+}}(%esp)
100; X86-NEXT:    fstpl (%esp)
101; X86-NEXT:    wait
102; X86-NEXT:    calll __truncdfbf2
103; X86-NEXT:    movw %ax, a
104; X86-NEXT:    addl $12, %esp
105; X86-NEXT:    .cfi_def_cfa_offset 4
106; X86-NEXT:    retl
107;
108; X64-LABEL: double_to_bfloat:
109; X64:       # %bb.0:
110; X64-NEXT:    pushq %rax
111; X64-NEXT:    .cfi_def_cfa_offset 16
112; X64-NEXT:    callq __truncdfbf2@PLT
113; X64-NEXT:    movq a@GOTPCREL(%rip), %rcx
114; X64-NEXT:    movw %ax, (%rcx)
115; X64-NEXT:    popq %rax
116; X64-NEXT:    .cfi_def_cfa_offset 8
117; X64-NEXT:    retq
118  %2 = tail call bfloat @llvm.experimental.constrained.fptrunc.bf16.f64(double %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
119  store bfloat %2, ptr @a, align 2
120  ret void
121}
122
123define void @add() strictfp {
124; X86-LABEL: add:
125; X86:       # %bb.0:
126; X86-NEXT:    subl $12, %esp
127; X86-NEXT:    .cfi_def_cfa_offset 16
128; X86-NEXT:    movzwl a, %eax
129; X86-NEXT:    movl %eax, (%esp)
130; X86-NEXT:    calll __extendbfsf2
131; X86-NEXT:    fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
132; X86-NEXT:    wait
133; X86-NEXT:    movzwl b, %eax
134; X86-NEXT:    movl %eax, (%esp)
135; X86-NEXT:    calll __extendbfsf2
136; X86-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
137; X86-NEXT:    faddp %st, %st(1)
138; X86-NEXT:    fstps (%esp)
139; X86-NEXT:    wait
140; X86-NEXT:    calll __truncsfbf2
141; X86-NEXT:    movw %ax, c
142; X86-NEXT:    addl $12, %esp
143; X86-NEXT:    .cfi_def_cfa_offset 4
144; X86-NEXT:    retl
145;
146; X64-LABEL: add:
147; X64:       # %bb.0:
148; X64-NEXT:    pushq %rax
149; X64-NEXT:    .cfi_def_cfa_offset 16
150; X64-NEXT:    movq a@GOTPCREL(%rip), %rax
151; X64-NEXT:    movzwl (%rax), %edi
152; X64-NEXT:    callq __extendbfsf2@PLT
153; X64-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
154; X64-NEXT:    movq b@GOTPCREL(%rip), %rax
155; X64-NEXT:    movzwl (%rax), %edi
156; X64-NEXT:    callq __extendbfsf2@PLT
157; X64-NEXT:    vaddss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
158; X64-NEXT:    callq __truncsfbf2@PLT
159; X64-NEXT:    movq c@GOTPCREL(%rip), %rcx
160; X64-NEXT:    movw %ax, (%rcx)
161; X64-NEXT:    popq %rax
162; X64-NEXT:    .cfi_def_cfa_offset 8
163; X64-NEXT:    retq
164  %1 = load bfloat, ptr @a, align 2
165  %2 = tail call float @llvm.experimental.constrained.fpext.f32.bf16(bfloat %1, metadata !"fpexcept.strict") #0
166  %3 = load bfloat, ptr @b, align 2
167  %4 = tail call float @llvm.experimental.constrained.fpext.f32.bf16(bfloat %3, metadata !"fpexcept.strict") #0
168  %5 = tail call float @llvm.experimental.constrained.fadd.f32(float %2, float %4, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
169  %6 = tail call bfloat @llvm.experimental.constrained.fptrunc.bf16.f32(float %5, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
170  store bfloat %6, ptr @c, align 2
171  ret void
172}
173
174declare float @llvm.experimental.constrained.fpext.f32.bf16(bfloat, metadata)
175declare double @llvm.experimental.constrained.fpext.f64.bf16(bfloat, metadata)
176declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
177declare bfloat @llvm.experimental.constrained.fptrunc.bf16.f32(float, metadata, metadata)
178declare bfloat @llvm.experimental.constrained.fptrunc.bf16.f64(double, metadata, metadata)
179
180attributes #0 = { strictfp }
181
182