xref: /llvm-project/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll (revision b279f6b098d3849f7f1c1f539b108307d5f8ae2d)
118be88e2SFraser Cormack; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2*b279f6b0SFangrui Song; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | FileCheck --check-prefixes=CHECK %s
3*b279f6b0SFangrui Song; RUN: %if ptxas-11.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | %ptxas-verify -arch=sm_80 %}
4ca10e3b2SHan Shen
5ca10e3b2SHan Shentarget datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
6ca10e3b2SHan Shen
7ca10e3b2SHan Shendeclare <2 x bfloat> @llvm.sin.f16(<2 x bfloat> %a) #0
8ca10e3b2SHan Shendeclare <2 x bfloat> @llvm.cos.f16(<2 x bfloat> %a) #0
9ca10e3b2SHan Shen
10ca10e3b2SHan Shendefine <2 x bfloat> @test_sin(<2 x bfloat> %a) #0 #1 {
1118be88e2SFraser Cormack; CHECK-LABEL: test_sin(
1218be88e2SFraser Cormack; CHECK:       {
134b24ab4bSAlex MacLean; CHECK-NEXT:    .reg .b16 %rs<3>;
1418be88e2SFraser Cormack; CHECK-NEXT:    .reg .b32 %r<3>;
1518be88e2SFraser Cormack; CHECK-NEXT:    .reg .f32 %f<5>;
1618be88e2SFraser Cormack; CHECK-EMPTY:
1718be88e2SFraser Cormack; CHECK-NEXT:  // %bb.0:
1818be88e2SFraser Cormack; CHECK-NEXT:    ld.param.b32 %r1, [test_sin_param_0];
1918be88e2SFraser Cormack; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
204b24ab4bSAlex MacLean; CHECK-NEXT:    cvt.f32.bf16 %f1, %rs1;
2118be88e2SFraser Cormack; CHECK-NEXT:    sin.approx.f32 %f2, %f1;
224b24ab4bSAlex MacLean; CHECK-NEXT:    cvt.f32.bf16 %f3, %rs2;
2318be88e2SFraser Cormack; CHECK-NEXT:    sin.approx.f32 %f4, %f3;
244b24ab4bSAlex MacLean; CHECK-NEXT:    cvt.rn.bf16x2.f32 %r2, %f4, %f2;
2518be88e2SFraser Cormack; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
2618be88e2SFraser Cormack; CHECK-NEXT:    ret;
27ca10e3b2SHan Shen  %r = call <2 x bfloat> @llvm.sin.f16(<2 x bfloat> %a)
28ca10e3b2SHan Shen  ret <2 x bfloat> %r
29ca10e3b2SHan Shen}
30ca10e3b2SHan Shen
31ca10e3b2SHan Shendefine <2 x bfloat> @test_cos(<2 x bfloat> %a) #0 #1 {
3218be88e2SFraser Cormack; CHECK-LABEL: test_cos(
3318be88e2SFraser Cormack; CHECK:       {
344b24ab4bSAlex MacLean; CHECK-NEXT:    .reg .b16 %rs<3>;
3518be88e2SFraser Cormack; CHECK-NEXT:    .reg .b32 %r<3>;
3618be88e2SFraser Cormack; CHECK-NEXT:    .reg .f32 %f<5>;
3718be88e2SFraser Cormack; CHECK-EMPTY:
3818be88e2SFraser Cormack; CHECK-NEXT:  // %bb.0:
3918be88e2SFraser Cormack; CHECK-NEXT:    ld.param.b32 %r1, [test_cos_param_0];
4018be88e2SFraser Cormack; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
414b24ab4bSAlex MacLean; CHECK-NEXT:    cvt.f32.bf16 %f1, %rs1;
4218be88e2SFraser Cormack; CHECK-NEXT:    cos.approx.f32 %f2, %f1;
434b24ab4bSAlex MacLean; CHECK-NEXT:    cvt.f32.bf16 %f3, %rs2;
4418be88e2SFraser Cormack; CHECK-NEXT:    cos.approx.f32 %f4, %f3;
454b24ab4bSAlex MacLean; CHECK-NEXT:    cvt.rn.bf16x2.f32 %r2, %f4, %f2;
4618be88e2SFraser Cormack; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
4718be88e2SFraser Cormack; CHECK-NEXT:    ret;
48ca10e3b2SHan Shen  %r = call <2 x bfloat> @llvm.cos.f16(<2 x bfloat> %a)
49ca10e3b2SHan Shen  ret <2 x bfloat> %r
50ca10e3b2SHan Shen}
51ca10e3b2SHan Shen
52