xref: /llvm-project/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll (revision b279f6b098d3849f7f1c1f539b108307d5f8ae2d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | FileCheck --check-prefixes=CHECK %s
3; RUN: %if ptxas-11.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | %ptxas-verify -arch=sm_80 %}
4
5target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
6
7declare <2 x bfloat> @llvm.sin.f16(<2 x bfloat> %a) #0
8declare <2 x bfloat> @llvm.cos.f16(<2 x bfloat> %a) #0
9
10define <2 x bfloat> @test_sin(<2 x bfloat> %a) #0 #1 {
11; CHECK-LABEL: test_sin(
12; CHECK:       {
13; CHECK-NEXT:    .reg .b16 %rs<3>;
14; CHECK-NEXT:    .reg .b32 %r<3>;
15; CHECK-NEXT:    .reg .f32 %f<5>;
16; CHECK-EMPTY:
17; CHECK-NEXT:  // %bb.0:
18; CHECK-NEXT:    ld.param.b32 %r1, [test_sin_param_0];
19; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
20; CHECK-NEXT:    cvt.f32.bf16 %f1, %rs1;
21; CHECK-NEXT:    sin.approx.f32 %f2, %f1;
22; CHECK-NEXT:    cvt.f32.bf16 %f3, %rs2;
23; CHECK-NEXT:    sin.approx.f32 %f4, %f3;
24; CHECK-NEXT:    cvt.rn.bf16x2.f32 %r2, %f4, %f2;
25; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
26; CHECK-NEXT:    ret;
27  %r = call <2 x bfloat> @llvm.sin.f16(<2 x bfloat> %a)
28  ret <2 x bfloat> %r
29}
30
31define <2 x bfloat> @test_cos(<2 x bfloat> %a) #0 #1 {
32; CHECK-LABEL: test_cos(
33; CHECK:       {
34; CHECK-NEXT:    .reg .b16 %rs<3>;
35; CHECK-NEXT:    .reg .b32 %r<3>;
36; CHECK-NEXT:    .reg .f32 %f<5>;
37; CHECK-EMPTY:
38; CHECK-NEXT:  // %bb.0:
39; CHECK-NEXT:    ld.param.b32 %r1, [test_cos_param_0];
40; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
41; CHECK-NEXT:    cvt.f32.bf16 %f1, %rs1;
42; CHECK-NEXT:    cos.approx.f32 %f2, %f1;
43; CHECK-NEXT:    cvt.f32.bf16 %f3, %rs2;
44; CHECK-NEXT:    cos.approx.f32 %f4, %f3;
45; CHECK-NEXT:    cvt.rn.bf16x2.f32 %r2, %f4, %f2;
46; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
47; CHECK-NEXT:    ret;
48  %r = call <2 x bfloat> @llvm.cos.f16(<2 x bfloat> %a)
49  ret <2 x bfloat> %r
50}
51
52