1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefixes=X64 4; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512bf16,+avx512vl | FileCheck %s --check-prefixes=X64 5 6@a = global bfloat 0xR0000, align 2 7@b = global bfloat 0xR0000, align 2 8@c = global bfloat 0xR0000, align 2 9 10define float @bfloat_to_float() strictfp { 11; X86-LABEL: bfloat_to_float: 12; X86: # %bb.0: 13; X86-NEXT: subl $12, %esp 14; X86-NEXT: .cfi_def_cfa_offset 16 15; X86-NEXT: movzwl a, %eax 16; X86-NEXT: movl %eax, (%esp) 17; X86-NEXT: calll __extendbfsf2 18; X86-NEXT: addl $12, %esp 19; X86-NEXT: .cfi_def_cfa_offset 4 20; X86-NEXT: retl 21; 22; X64-LABEL: bfloat_to_float: 23; X64: # %bb.0: 24; X64-NEXT: pushq %rax 25; X64-NEXT: .cfi_def_cfa_offset 16 26; X64-NEXT: movq a@GOTPCREL(%rip), %rax 27; X64-NEXT: movzwl (%rax), %edi 28; X64-NEXT: callq __extendbfsf2@PLT 29; X64-NEXT: popq %rax 30; X64-NEXT: .cfi_def_cfa_offset 8 31; X64-NEXT: retq 32 %1 = load bfloat, ptr @a, align 2 33 %2 = tail call float @llvm.experimental.constrained.fpext.f32.bfloat(bfloat %1, metadata !"fpexcept.strict") #0 34 ret float %2 35} 36 37define double @bfloat_to_double() strictfp { 38; X86-LABEL: bfloat_to_double: 39; X86: # %bb.0: 40; X86-NEXT: subl $12, %esp 41; X86-NEXT: .cfi_def_cfa_offset 16 42; X86-NEXT: movzwl a, %eax 43; X86-NEXT: movl %eax, (%esp) 44; X86-NEXT: calll __extendbfsf2 45; X86-NEXT: addl $12, %esp 46; X86-NEXT: .cfi_def_cfa_offset 4 47; X86-NEXT: retl 48; 49; X64-LABEL: bfloat_to_double: 50; X64: # %bb.0: 51; X64-NEXT: pushq %rax 52; X64-NEXT: .cfi_def_cfa_offset 16 53; X64-NEXT: movq a@GOTPCREL(%rip), %rax 54; X64-NEXT: movzwl (%rax), %edi 55; X64-NEXT: callq __extendbfsf2@PLT 56; X64-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 57; X64-NEXT: popq %rax 58; X64-NEXT: .cfi_def_cfa_offset 8 59; X64-NEXT: retq 60 %1 = load bfloat, ptr @a, align 2 61 %2 = tail call double @llvm.experimental.constrained.fpext.f64.bfloat(bfloat %1, metadata !"fpexcept.strict") #0 62 ret double %2 63} 64 65define void @float_to_bfloat(float %0) strictfp { 66; X86-LABEL: float_to_bfloat: 67; X86: # %bb.0: 68; X86-NEXT: subl $12, %esp 69; X86-NEXT: .cfi_def_cfa_offset 16 70; X86-NEXT: flds {{[0-9]+}}(%esp) 71; X86-NEXT: fstps (%esp) 72; X86-NEXT: wait 73; X86-NEXT: calll __truncsfbf2 74; X86-NEXT: movw %ax, a 75; X86-NEXT: addl $12, %esp 76; X86-NEXT: .cfi_def_cfa_offset 4 77; X86-NEXT: retl 78; 79; X64-LABEL: float_to_bfloat: 80; X64: # %bb.0: 81; X64-NEXT: pushq %rax 82; X64-NEXT: .cfi_def_cfa_offset 16 83; X64-NEXT: callq __truncsfbf2@PLT 84; X64-NEXT: movq a@GOTPCREL(%rip), %rcx 85; X64-NEXT: movw %ax, (%rcx) 86; X64-NEXT: popq %rax 87; X64-NEXT: .cfi_def_cfa_offset 8 88; X64-NEXT: retq 89 %2 = tail call bfloat @llvm.experimental.constrained.fptrunc.bf16.f32(float %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 90 store bfloat %2, ptr @a, align 2 91 ret void 92} 93 94define void @double_to_bfloat(double %0) strictfp { 95; X86-LABEL: double_to_bfloat: 96; X86: # %bb.0: 97; X86-NEXT: subl $12, %esp 98; X86-NEXT: .cfi_def_cfa_offset 16 99; X86-NEXT: fldl {{[0-9]+}}(%esp) 100; X86-NEXT: fstpl (%esp) 101; X86-NEXT: wait 102; X86-NEXT: calll __truncdfbf2 103; X86-NEXT: movw %ax, a 104; X86-NEXT: addl $12, %esp 105; X86-NEXT: .cfi_def_cfa_offset 4 106; X86-NEXT: retl 107; 108; X64-LABEL: double_to_bfloat: 109; X64: # %bb.0: 110; X64-NEXT: pushq %rax 111; X64-NEXT: .cfi_def_cfa_offset 16 112; X64-NEXT: callq __truncdfbf2@PLT 113; X64-NEXT: movq a@GOTPCREL(%rip), %rcx 114; X64-NEXT: movw %ax, (%rcx) 115; X64-NEXT: popq %rax 116; X64-NEXT: .cfi_def_cfa_offset 8 117; X64-NEXT: retq 118 %2 = tail call bfloat @llvm.experimental.constrained.fptrunc.bf16.f64(double %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 119 store bfloat %2, ptr @a, align 2 120 ret void 121} 122 123define void @add() strictfp { 124; X86-LABEL: add: 125; X86: # %bb.0: 126; X86-NEXT: subl $12, %esp 127; X86-NEXT: .cfi_def_cfa_offset 16 128; X86-NEXT: movzwl a, %eax 129; X86-NEXT: movl %eax, (%esp) 130; X86-NEXT: calll __extendbfsf2 131; X86-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill 132; X86-NEXT: wait 133; X86-NEXT: movzwl b, %eax 134; X86-NEXT: movl %eax, (%esp) 135; X86-NEXT: calll __extendbfsf2 136; X86-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload 137; X86-NEXT: faddp %st, %st(1) 138; X86-NEXT: fstps (%esp) 139; X86-NEXT: wait 140; X86-NEXT: calll __truncsfbf2 141; X86-NEXT: movw %ax, c 142; X86-NEXT: addl $12, %esp 143; X86-NEXT: .cfi_def_cfa_offset 4 144; X86-NEXT: retl 145; 146; X64-LABEL: add: 147; X64: # %bb.0: 148; X64-NEXT: pushq %rax 149; X64-NEXT: .cfi_def_cfa_offset 16 150; X64-NEXT: movq a@GOTPCREL(%rip), %rax 151; X64-NEXT: movzwl (%rax), %edi 152; X64-NEXT: callq __extendbfsf2@PLT 153; X64-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 154; X64-NEXT: movq b@GOTPCREL(%rip), %rax 155; X64-NEXT: movzwl (%rax), %edi 156; X64-NEXT: callq __extendbfsf2@PLT 157; X64-NEXT: vaddss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload 158; X64-NEXT: callq __truncsfbf2@PLT 159; X64-NEXT: movq c@GOTPCREL(%rip), %rcx 160; X64-NEXT: movw %ax, (%rcx) 161; X64-NEXT: popq %rax 162; X64-NEXT: .cfi_def_cfa_offset 8 163; X64-NEXT: retq 164 %1 = load bfloat, ptr @a, align 2 165 %2 = tail call float @llvm.experimental.constrained.fpext.f32.bf16(bfloat %1, metadata !"fpexcept.strict") #0 166 %3 = load bfloat, ptr @b, align 2 167 %4 = tail call float @llvm.experimental.constrained.fpext.f32.bf16(bfloat %3, metadata !"fpexcept.strict") #0 168 %5 = tail call float @llvm.experimental.constrained.fadd.f32(float %2, float %4, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 169 %6 = tail call bfloat @llvm.experimental.constrained.fptrunc.bf16.f32(float %5, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 170 store bfloat %6, ptr @c, align 2 171 ret void 172} 173 174declare float @llvm.experimental.constrained.fpext.f32.bf16(bfloat, metadata) 175declare double @llvm.experimental.constrained.fpext.f64.bf16(bfloat, metadata) 176declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) 177declare bfloat @llvm.experimental.constrained.fptrunc.bf16.f32(float, metadata, metadata) 178declare bfloat @llvm.experimental.constrained.fptrunc.bf16.f64(double, metadata, metadata) 179 180attributes #0 = { strictfp } 181 182