1*3ba339b5SPrinceton Ferro; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2*3ba339b5SPrinceton Ferro; RUN: llc < %s -mcpu=sm_50 -mattr=+ptx32 | FileCheck --check-prefixes=CHECK %s 3*3ba339b5SPrinceton Ferro; RUN: llc < %s -mcpu=sm_75 -mattr=+ptx70 | FileCheck --check-prefixes=CHECK-FP16 %s 4*3ba339b5SPrinceton Ferro; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx78 | FileCheck --check-prefixes=CHECK-BF16 %s 5*3ba339b5SPrinceton Ferro; RUN: %if ptxas-12.0 %{ llc < %s -mcpu=sm_50 -mattr=+ptx32 | %ptxas-verify -arch=sm_50 %} 6*3ba339b5SPrinceton Ferro; RUN: %if ptxas-12.0 %{ llc < %s -mcpu=sm_75 -mattr=+ptx70 | %ptxas-verify -arch=sm_75 %} 7*3ba339b5SPrinceton Ferro; RUN: %if ptxas-12.0 %{ llc < %s -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %} 8*3ba339b5SPrinceton Ferrotarget triple = "nvptx64-nvidia-cuda" 9*3ba339b5SPrinceton Ferro 10*3ba339b5SPrinceton Ferro; --- f32 --- 11*3ba339b5SPrinceton Ferro 12*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_test 13*3ba339b5SPrinceton Ferrodefine float @exp2_test(float %in) { 14*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_test( 15*3ba339b5SPrinceton Ferro; CHECK: { 16*3ba339b5SPrinceton Ferro; CHECK-NEXT: .reg .f32 %f<3>; 17*3ba339b5SPrinceton Ferro; CHECK-EMPTY: 18*3ba339b5SPrinceton Ferro; CHECK-NEXT: // %bb.0: // %entry 19*3ba339b5SPrinceton Ferro; CHECK-NEXT: ld.param.f32 %f1, [exp2_test_param_0]; 20*3ba339b5SPrinceton Ferro; CHECK-NEXT: ex2.approx.f32 %f2, %f1; 21*3ba339b5SPrinceton Ferro; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 22*3ba339b5SPrinceton Ferro; CHECK-NEXT: ret; 23*3ba339b5SPrinceton Ferro; 24*3ba339b5SPrinceton Ferro; CHECK-FP16-LABEL: exp2_test( 25*3ba339b5SPrinceton Ferro; CHECK-FP16: { 26*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: .reg .f32 %f<3>; 27*3ba339b5SPrinceton Ferro; CHECK-FP16-EMPTY: 28*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: // %bb.0: // %entry 29*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ld.param.f32 %f1, [exp2_test_param_0]; 30*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ex2.approx.f32 %f2, %f1; 31*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: st.param.f32 [func_retval0], %f2; 32*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ret; 33*3ba339b5SPrinceton Ferro; 34*3ba339b5SPrinceton Ferro; CHECK-BF16-LABEL: exp2_test( 35*3ba339b5SPrinceton Ferro; CHECK-BF16: { 36*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: .reg .f32 %f<3>; 37*3ba339b5SPrinceton Ferro; CHECK-BF16-EMPTY: 38*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: // %bb.0: // %entry 39*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ld.param.f32 %f1, [exp2_test_param_0]; 40*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ex2.approx.f32 %f2, %f1; 41*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: st.param.f32 [func_retval0], %f2; 42*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ret; 43*3ba339b5SPrinceton Ferroentry: 44*3ba339b5SPrinceton Ferro %exp2 = call float @llvm.exp2.f32(float %in) 45*3ba339b5SPrinceton Ferro ret float %exp2 46*3ba339b5SPrinceton Ferro} 47*3ba339b5SPrinceton Ferro 48*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_ftz_test 49*3ba339b5SPrinceton Ferrodefine float @exp2_ftz_test(float %in) #0 { 50*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_ftz_test( 51*3ba339b5SPrinceton Ferro; CHECK: { 52*3ba339b5SPrinceton Ferro; CHECK-NEXT: .reg .f32 %f<3>; 53*3ba339b5SPrinceton Ferro; CHECK-EMPTY: 54*3ba339b5SPrinceton Ferro; CHECK-NEXT: // %bb.0: // %entry 55*3ba339b5SPrinceton Ferro; CHECK-NEXT: ld.param.f32 %f1, [exp2_ftz_test_param_0]; 56*3ba339b5SPrinceton Ferro; CHECK-NEXT: ex2.approx.ftz.f32 %f2, %f1; 57*3ba339b5SPrinceton Ferro; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 58*3ba339b5SPrinceton Ferro; CHECK-NEXT: ret; 59*3ba339b5SPrinceton Ferro; 60*3ba339b5SPrinceton Ferro; CHECK-FP16-LABEL: exp2_ftz_test( 61*3ba339b5SPrinceton Ferro; CHECK-FP16: { 62*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: .reg .f32 %f<3>; 63*3ba339b5SPrinceton Ferro; CHECK-FP16-EMPTY: 64*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: // %bb.0: // %entry 65*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ld.param.f32 %f1, [exp2_ftz_test_param_0]; 66*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ex2.approx.ftz.f32 %f2, %f1; 67*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: st.param.f32 [func_retval0], %f2; 68*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ret; 69*3ba339b5SPrinceton Ferro; 70*3ba339b5SPrinceton Ferro; CHECK-BF16-LABEL: exp2_ftz_test( 71*3ba339b5SPrinceton Ferro; CHECK-BF16: { 72*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: .reg .f32 %f<3>; 73*3ba339b5SPrinceton Ferro; CHECK-BF16-EMPTY: 74*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: // %bb.0: // %entry 75*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ld.param.f32 %f1, [exp2_ftz_test_param_0]; 76*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ex2.approx.ftz.f32 %f2, %f1; 77*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: st.param.f32 [func_retval0], %f2; 78*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ret; 79*3ba339b5SPrinceton Ferroentry: 80*3ba339b5SPrinceton Ferro %exp2 = call float @llvm.exp2.f32(float %in) 81*3ba339b5SPrinceton Ferro ret float %exp2 82*3ba339b5SPrinceton Ferro} 83*3ba339b5SPrinceton Ferro 84*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_test_v 85*3ba339b5SPrinceton Ferrodefine <2 x float> @exp2_test_v(<2 x float> %in) { 86*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_test_v( 87*3ba339b5SPrinceton Ferro; CHECK: { 88*3ba339b5SPrinceton Ferro; CHECK-NEXT: .reg .f32 %f<5>; 89*3ba339b5SPrinceton Ferro; CHECK-EMPTY: 90*3ba339b5SPrinceton Ferro; CHECK-NEXT: // %bb.0: // %entry 91*3ba339b5SPrinceton Ferro; CHECK-NEXT: ld.param.v2.f32 {%f1, %f2}, [exp2_test_v_param_0]; 92*3ba339b5SPrinceton Ferro; CHECK-NEXT: ex2.approx.f32 %f3, %f2; 93*3ba339b5SPrinceton Ferro; CHECK-NEXT: ex2.approx.f32 %f4, %f1; 94*3ba339b5SPrinceton Ferro; CHECK-NEXT: st.param.v2.f32 [func_retval0], {%f4, %f3}; 95*3ba339b5SPrinceton Ferro; CHECK-NEXT: ret; 96*3ba339b5SPrinceton Ferro; 97*3ba339b5SPrinceton Ferro; CHECK-FP16-LABEL: exp2_test_v( 98*3ba339b5SPrinceton Ferro; CHECK-FP16: { 99*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: .reg .f32 %f<5>; 100*3ba339b5SPrinceton Ferro; CHECK-FP16-EMPTY: 101*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: // %bb.0: // %entry 102*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ld.param.v2.f32 {%f1, %f2}, [exp2_test_v_param_0]; 103*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ex2.approx.f32 %f3, %f2; 104*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ex2.approx.f32 %f4, %f1; 105*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: st.param.v2.f32 [func_retval0], {%f4, %f3}; 106*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ret; 107*3ba339b5SPrinceton Ferro; 108*3ba339b5SPrinceton Ferro; CHECK-BF16-LABEL: exp2_test_v( 109*3ba339b5SPrinceton Ferro; CHECK-BF16: { 110*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: .reg .f32 %f<5>; 111*3ba339b5SPrinceton Ferro; CHECK-BF16-EMPTY: 112*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: // %bb.0: // %entry 113*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ld.param.v2.f32 {%f1, %f2}, [exp2_test_v_param_0]; 114*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ex2.approx.f32 %f3, %f2; 115*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ex2.approx.f32 %f4, %f1; 116*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: st.param.v2.f32 [func_retval0], {%f4, %f3}; 117*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ret; 118*3ba339b5SPrinceton Ferroentry: 119*3ba339b5SPrinceton Ferro %exp2 = call <2 x float> @llvm.exp2.v2f32(<2 x float> %in) 120*3ba339b5SPrinceton Ferro ret <2 x float> %exp2 121*3ba339b5SPrinceton Ferro} 122*3ba339b5SPrinceton Ferro 123*3ba339b5SPrinceton Ferro; --- f16 --- 124*3ba339b5SPrinceton Ferro 125*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_f16_test 126*3ba339b5SPrinceton Ferrodefine half @exp2_f16_test(half %in) { 127*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_f16_test( 128*3ba339b5SPrinceton Ferro; CHECK: { 129*3ba339b5SPrinceton Ferro; CHECK-NEXT: .reg .b16 %rs<3>; 130*3ba339b5SPrinceton Ferro; CHECK-NEXT: .reg .f32 %f<3>; 131*3ba339b5SPrinceton Ferro; CHECK-EMPTY: 132*3ba339b5SPrinceton Ferro; CHECK-NEXT: // %bb.0: // %entry 133*3ba339b5SPrinceton Ferro; CHECK-NEXT: ld.param.b16 %rs1, [exp2_f16_test_param_0]; 134*3ba339b5SPrinceton Ferro; CHECK-NEXT: cvt.f32.f16 %f1, %rs1; 135*3ba339b5SPrinceton Ferro; CHECK-NEXT: ex2.approx.f32 %f2, %f1; 136*3ba339b5SPrinceton Ferro; CHECK-NEXT: cvt.rn.f16.f32 %rs2, %f2; 137*3ba339b5SPrinceton Ferro; CHECK-NEXT: st.param.b16 [func_retval0], %rs2; 138*3ba339b5SPrinceton Ferro; CHECK-NEXT: ret; 139*3ba339b5SPrinceton Ferro; 140*3ba339b5SPrinceton Ferro; CHECK-FP16-LABEL: exp2_f16_test( 141*3ba339b5SPrinceton Ferro; CHECK-FP16: { 142*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: .reg .b16 %rs<3>; 143*3ba339b5SPrinceton Ferro; CHECK-FP16-EMPTY: 144*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: // %bb.0: // %entry 145*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ld.param.b16 %rs1, [exp2_f16_test_param_0]; 146*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ex2.approx.f16 %rs2, %rs1; 147*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: st.param.b16 [func_retval0], %rs2; 148*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ret; 149*3ba339b5SPrinceton Ferro; 150*3ba339b5SPrinceton Ferro; CHECK-BF16-LABEL: exp2_f16_test( 151*3ba339b5SPrinceton Ferro; CHECK-BF16: { 152*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: .reg .b16 %rs<3>; 153*3ba339b5SPrinceton Ferro; CHECK-BF16-EMPTY: 154*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: // %bb.0: // %entry 155*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ld.param.b16 %rs1, [exp2_f16_test_param_0]; 156*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ex2.approx.f16 %rs2, %rs1; 157*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: st.param.b16 [func_retval0], %rs2; 158*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ret; 159*3ba339b5SPrinceton Ferroentry: 160*3ba339b5SPrinceton Ferro %exp2 = call half @llvm.exp2.f16(half %in) 161*3ba339b5SPrinceton Ferro ret half %exp2 162*3ba339b5SPrinceton Ferro} 163*3ba339b5SPrinceton Ferro 164*3ba339b5SPrinceton Ferro; COM: we should never have .ftz for f16 165*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_f16_ftz_test 166*3ba339b5SPrinceton Ferrodefine half @exp2_f16_ftz_test(half %in) #0 { 167*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_f16_ftz_test( 168*3ba339b5SPrinceton Ferro; CHECK: { 169*3ba339b5SPrinceton Ferro; CHECK-NEXT: .reg .b16 %rs<3>; 170*3ba339b5SPrinceton Ferro; CHECK-NEXT: .reg .f32 %f<3>; 171*3ba339b5SPrinceton Ferro; CHECK-EMPTY: 172*3ba339b5SPrinceton Ferro; CHECK-NEXT: // %bb.0: // %entry 173*3ba339b5SPrinceton Ferro; CHECK-NEXT: ld.param.b16 %rs1, [exp2_f16_ftz_test_param_0]; 174*3ba339b5SPrinceton Ferro; CHECK-NEXT: cvt.ftz.f32.f16 %f1, %rs1; 175*3ba339b5SPrinceton Ferro; CHECK-NEXT: ex2.approx.ftz.f32 %f2, %f1; 176*3ba339b5SPrinceton Ferro; CHECK-NEXT: cvt.rn.f16.f32 %rs2, %f2; 177*3ba339b5SPrinceton Ferro; CHECK-NEXT: st.param.b16 [func_retval0], %rs2; 178*3ba339b5SPrinceton Ferro; CHECK-NEXT: ret; 179*3ba339b5SPrinceton Ferro; 180*3ba339b5SPrinceton Ferro; CHECK-FP16-LABEL: exp2_f16_ftz_test( 181*3ba339b5SPrinceton Ferro; CHECK-FP16: { 182*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: .reg .b16 %rs<3>; 183*3ba339b5SPrinceton Ferro; CHECK-FP16-EMPTY: 184*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: // %bb.0: // %entry 185*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ld.param.b16 %rs1, [exp2_f16_ftz_test_param_0]; 186*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ex2.approx.f16 %rs2, %rs1; 187*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: st.param.b16 [func_retval0], %rs2; 188*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ret; 189*3ba339b5SPrinceton Ferro; 190*3ba339b5SPrinceton Ferro; CHECK-BF16-LABEL: exp2_f16_ftz_test( 191*3ba339b5SPrinceton Ferro; CHECK-BF16: { 192*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: .reg .b16 %rs<3>; 193*3ba339b5SPrinceton Ferro; CHECK-BF16-EMPTY: 194*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: // %bb.0: // %entry 195*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ld.param.b16 %rs1, [exp2_f16_ftz_test_param_0]; 196*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ex2.approx.f16 %rs2, %rs1; 197*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: st.param.b16 [func_retval0], %rs2; 198*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ret; 199*3ba339b5SPrinceton Ferroentry: 200*3ba339b5SPrinceton Ferro %exp2 = call half @llvm.exp2.f16(half %in) 201*3ba339b5SPrinceton Ferro ret half %exp2 202*3ba339b5SPrinceton Ferro} 203*3ba339b5SPrinceton Ferro 204*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_f16_test_v 205*3ba339b5SPrinceton Ferrodefine <2 x half> @exp2_f16_test_v(<2 x half> %in) { 206*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_f16_test_v( 207*3ba339b5SPrinceton Ferro; CHECK: { 208*3ba339b5SPrinceton Ferro; CHECK-NEXT: .reg .b16 %rs<5>; 209*3ba339b5SPrinceton Ferro; CHECK-NEXT: .reg .b32 %r<3>; 210*3ba339b5SPrinceton Ferro; CHECK-NEXT: .reg .f32 %f<5>; 211*3ba339b5SPrinceton Ferro; CHECK-EMPTY: 212*3ba339b5SPrinceton Ferro; CHECK-NEXT: // %bb.0: // %entry 213*3ba339b5SPrinceton Ferro; CHECK-NEXT: ld.param.b32 %r1, [exp2_f16_test_v_param_0]; 214*3ba339b5SPrinceton Ferro; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 215*3ba339b5SPrinceton Ferro; CHECK-NEXT: cvt.f32.f16 %f1, %rs2; 216*3ba339b5SPrinceton Ferro; CHECK-NEXT: ex2.approx.f32 %f2, %f1; 217*3ba339b5SPrinceton Ferro; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %f2; 218*3ba339b5SPrinceton Ferro; CHECK-NEXT: cvt.f32.f16 %f3, %rs1; 219*3ba339b5SPrinceton Ferro; CHECK-NEXT: ex2.approx.f32 %f4, %f3; 220*3ba339b5SPrinceton Ferro; CHECK-NEXT: cvt.rn.f16.f32 %rs4, %f4; 221*3ba339b5SPrinceton Ferro; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; 222*3ba339b5SPrinceton Ferro; CHECK-NEXT: st.param.b32 [func_retval0], %r2; 223*3ba339b5SPrinceton Ferro; CHECK-NEXT: ret; 224*3ba339b5SPrinceton Ferro; 225*3ba339b5SPrinceton Ferro; CHECK-FP16-LABEL: exp2_f16_test_v( 226*3ba339b5SPrinceton Ferro; CHECK-FP16: { 227*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: .reg .b32 %r<3>; 228*3ba339b5SPrinceton Ferro; CHECK-FP16-EMPTY: 229*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: // %bb.0: // %entry 230*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ld.param.b32 %r1, [exp2_f16_test_v_param_0]; 231*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ex2.approx.f16x2 %r2, %r1; 232*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: st.param.b32 [func_retval0], %r2; 233*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ret; 234*3ba339b5SPrinceton Ferro; 235*3ba339b5SPrinceton Ferro; CHECK-BF16-LABEL: exp2_f16_test_v( 236*3ba339b5SPrinceton Ferro; CHECK-BF16: { 237*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: .reg .b32 %r<3>; 238*3ba339b5SPrinceton Ferro; CHECK-BF16-EMPTY: 239*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: // %bb.0: // %entry 240*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ld.param.b32 %r1, [exp2_f16_test_v_param_0]; 241*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ex2.approx.f16x2 %r2, %r1; 242*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: st.param.b32 [func_retval0], %r2; 243*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ret; 244*3ba339b5SPrinceton Ferroentry: 245*3ba339b5SPrinceton Ferro %exp2 = call <2 x half> @llvm.exp2.v2f16(<2 x half> %in) 246*3ba339b5SPrinceton Ferro ret <2 x half> %exp2 247*3ba339b5SPrinceton Ferro} 248*3ba339b5SPrinceton Ferro 249*3ba339b5SPrinceton Ferro; --- bf16 --- 250*3ba339b5SPrinceton Ferro 251*3ba339b5SPrinceton Ferro; COM: we should always have .ftz for bf16 252*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_bf16_test 253*3ba339b5SPrinceton Ferrodefine bfloat @exp2_bf16_test(bfloat %in) { 254*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_bf16_test( 255*3ba339b5SPrinceton Ferro; CHECK: { 256*3ba339b5SPrinceton Ferro; CHECK-NEXT: .reg .pred %p<2>; 257*3ba339b5SPrinceton Ferro; CHECK-NEXT: .reg .b16 %rs<2>; 258*3ba339b5SPrinceton Ferro; CHECK-NEXT: .reg .b32 %r<9>; 259*3ba339b5SPrinceton Ferro; CHECK-NEXT: .reg .f32 %f<3>; 260*3ba339b5SPrinceton Ferro; CHECK-EMPTY: 261*3ba339b5SPrinceton Ferro; CHECK-NEXT: // %bb.0: // %entry 262*3ba339b5SPrinceton Ferro; CHECK-NEXT: ld.param.u16 %r1, [exp2_bf16_test_param_0]; 263*3ba339b5SPrinceton Ferro; CHECK-NEXT: shl.b32 %r2, %r1, 16; 264*3ba339b5SPrinceton Ferro; CHECK-NEXT: mov.b32 %f1, %r2; 265*3ba339b5SPrinceton Ferro; CHECK-NEXT: ex2.approx.f32 %f2, %f1; 266*3ba339b5SPrinceton Ferro; CHECK-NEXT: mov.b32 %r3, %f2; 267*3ba339b5SPrinceton Ferro; CHECK-NEXT: bfe.u32 %r4, %r3, 16, 1; 268*3ba339b5SPrinceton Ferro; CHECK-NEXT: add.s32 %r5, %r4, %r3; 269*3ba339b5SPrinceton Ferro; CHECK-NEXT: add.s32 %r6, %r5, 32767; 270*3ba339b5SPrinceton Ferro; CHECK-NEXT: setp.nan.f32 %p1, %f2, %f2; 271*3ba339b5SPrinceton Ferro; CHECK-NEXT: or.b32 %r7, %r3, 4194304; 272*3ba339b5SPrinceton Ferro; CHECK-NEXT: selp.b32 %r8, %r7, %r6, %p1; 273*3ba339b5SPrinceton Ferro; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r8; } 274*3ba339b5SPrinceton Ferro; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; 275*3ba339b5SPrinceton Ferro; CHECK-NEXT: ret; 276*3ba339b5SPrinceton Ferro; 277*3ba339b5SPrinceton Ferro; CHECK-FP16-LABEL: exp2_bf16_test( 278*3ba339b5SPrinceton Ferro; CHECK-FP16: { 279*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: .reg .pred %p<2>; 280*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: .reg .b16 %rs<2>; 281*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: .reg .b32 %r<9>; 282*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: .reg .f32 %f<3>; 283*3ba339b5SPrinceton Ferro; CHECK-FP16-EMPTY: 284*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: // %bb.0: // %entry 285*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ld.param.u16 %r1, [exp2_bf16_test_param_0]; 286*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: shl.b32 %r2, %r1, 16; 287*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: mov.b32 %f1, %r2; 288*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ex2.approx.f32 %f2, %f1; 289*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: mov.b32 %r3, %f2; 290*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: bfe.u32 %r4, %r3, 16, 1; 291*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: add.s32 %r5, %r4, %r3; 292*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: add.s32 %r6, %r5, 32767; 293*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: setp.nan.f32 %p1, %f2, %f2; 294*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: or.b32 %r7, %r3, 4194304; 295*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: selp.b32 %r8, %r7, %r6, %p1; 296*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r8; } 297*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: st.param.b16 [func_retval0], %rs1; 298*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ret; 299*3ba339b5SPrinceton Ferro; 300*3ba339b5SPrinceton Ferro; CHECK-BF16-LABEL: exp2_bf16_test( 301*3ba339b5SPrinceton Ferro; CHECK-BF16: { 302*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: .reg .b16 %rs<3>; 303*3ba339b5SPrinceton Ferro; CHECK-BF16-EMPTY: 304*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: // %bb.0: // %entry 305*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ld.param.b16 %rs1, [exp2_bf16_test_param_0]; 306*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ex2.approx.ftz.bf16 %rs2, %rs1; 307*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: st.param.b16 [func_retval0], %rs2; 308*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ret; 309*3ba339b5SPrinceton Ferroentry: 310*3ba339b5SPrinceton Ferro %exp2 = call bfloat @llvm.exp2.bf16(bfloat %in) 311*3ba339b5SPrinceton Ferro ret bfloat %exp2 312*3ba339b5SPrinceton Ferro} 313*3ba339b5SPrinceton Ferro 314*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_bf16_test_v 315*3ba339b5SPrinceton Ferrodefine <2 x bfloat> @exp2_bf16_test_v(<2 x bfloat> %in) { 316*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_bf16_test_v( 317*3ba339b5SPrinceton Ferro; CHECK: { 318*3ba339b5SPrinceton Ferro; CHECK-NEXT: .reg .pred %p<3>; 319*3ba339b5SPrinceton Ferro; CHECK-NEXT: .reg .b16 %rs<3>; 320*3ba339b5SPrinceton Ferro; CHECK-NEXT: .reg .b32 %r<19>; 321*3ba339b5SPrinceton Ferro; CHECK-NEXT: .reg .f32 %f<5>; 322*3ba339b5SPrinceton Ferro; CHECK-EMPTY: 323*3ba339b5SPrinceton Ferro; CHECK-NEXT: // %bb.0: // %entry 324*3ba339b5SPrinceton Ferro; CHECK-NEXT: ld.param.b32 %r1, [exp2_bf16_test_v_param_0]; 325*3ba339b5SPrinceton Ferro; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 326*3ba339b5SPrinceton Ferro; CHECK-NEXT: cvt.u32.u16 %r2, %rs2; 327*3ba339b5SPrinceton Ferro; CHECK-NEXT: shl.b32 %r3, %r2, 16; 328*3ba339b5SPrinceton Ferro; CHECK-NEXT: mov.b32 %f1, %r3; 329*3ba339b5SPrinceton Ferro; CHECK-NEXT: ex2.approx.f32 %f2, %f1; 330*3ba339b5SPrinceton Ferro; CHECK-NEXT: mov.b32 %r4, %f2; 331*3ba339b5SPrinceton Ferro; CHECK-NEXT: bfe.u32 %r5, %r4, 16, 1; 332*3ba339b5SPrinceton Ferro; CHECK-NEXT: add.s32 %r6, %r5, %r4; 333*3ba339b5SPrinceton Ferro; CHECK-NEXT: add.s32 %r7, %r6, 32767; 334*3ba339b5SPrinceton Ferro; CHECK-NEXT: setp.nan.f32 %p1, %f2, %f2; 335*3ba339b5SPrinceton Ferro; CHECK-NEXT: or.b32 %r8, %r4, 4194304; 336*3ba339b5SPrinceton Ferro; CHECK-NEXT: selp.b32 %r9, %r8, %r7, %p1; 337*3ba339b5SPrinceton Ferro; CHECK-NEXT: cvt.u32.u16 %r10, %rs1; 338*3ba339b5SPrinceton Ferro; CHECK-NEXT: shl.b32 %r11, %r10, 16; 339*3ba339b5SPrinceton Ferro; CHECK-NEXT: mov.b32 %f3, %r11; 340*3ba339b5SPrinceton Ferro; CHECK-NEXT: ex2.approx.f32 %f4, %f3; 341*3ba339b5SPrinceton Ferro; CHECK-NEXT: mov.b32 %r12, %f4; 342*3ba339b5SPrinceton Ferro; CHECK-NEXT: bfe.u32 %r13, %r12, 16, 1; 343*3ba339b5SPrinceton Ferro; CHECK-NEXT: add.s32 %r14, %r13, %r12; 344*3ba339b5SPrinceton Ferro; CHECK-NEXT: add.s32 %r15, %r14, 32767; 345*3ba339b5SPrinceton Ferro; CHECK-NEXT: setp.nan.f32 %p2, %f4, %f4; 346*3ba339b5SPrinceton Ferro; CHECK-NEXT: or.b32 %r16, %r12, 4194304; 347*3ba339b5SPrinceton Ferro; CHECK-NEXT: selp.b32 %r17, %r16, %r15, %p2; 348*3ba339b5SPrinceton Ferro; CHECK-NEXT: prmt.b32 %r18, %r17, %r9, 0x7632U; 349*3ba339b5SPrinceton Ferro; CHECK-NEXT: st.param.b32 [func_retval0], %r18; 350*3ba339b5SPrinceton Ferro; CHECK-NEXT: ret; 351*3ba339b5SPrinceton Ferro; 352*3ba339b5SPrinceton Ferro; CHECK-FP16-LABEL: exp2_bf16_test_v( 353*3ba339b5SPrinceton Ferro; CHECK-FP16: { 354*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: .reg .pred %p<3>; 355*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: .reg .b16 %rs<3>; 356*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: .reg .b32 %r<19>; 357*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: .reg .f32 %f<5>; 358*3ba339b5SPrinceton Ferro; CHECK-FP16-EMPTY: 359*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: // %bb.0: // %entry 360*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ld.param.b32 %r1, [exp2_bf16_test_v_param_0]; 361*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: mov.b32 {%rs1, %rs2}, %r1; 362*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: cvt.u32.u16 %r2, %rs2; 363*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: shl.b32 %r3, %r2, 16; 364*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: mov.b32 %f1, %r3; 365*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ex2.approx.f32 %f2, %f1; 366*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: mov.b32 %r4, %f2; 367*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: bfe.u32 %r5, %r4, 16, 1; 368*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: add.s32 %r6, %r5, %r4; 369*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: add.s32 %r7, %r6, 32767; 370*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: setp.nan.f32 %p1, %f2, %f2; 371*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: or.b32 %r8, %r4, 4194304; 372*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: selp.b32 %r9, %r8, %r7, %p1; 373*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: cvt.u32.u16 %r10, %rs1; 374*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: shl.b32 %r11, %r10, 16; 375*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: mov.b32 %f3, %r11; 376*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ex2.approx.f32 %f4, %f3; 377*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: mov.b32 %r12, %f4; 378*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: bfe.u32 %r13, %r12, 16, 1; 379*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: add.s32 %r14, %r13, %r12; 380*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: add.s32 %r15, %r14, 32767; 381*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: setp.nan.f32 %p2, %f4, %f4; 382*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: or.b32 %r16, %r12, 4194304; 383*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: selp.b32 %r17, %r16, %r15, %p2; 384*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: prmt.b32 %r18, %r17, %r9, 0x7632U; 385*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: st.param.b32 [func_retval0], %r18; 386*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ret; 387*3ba339b5SPrinceton Ferro; 388*3ba339b5SPrinceton Ferro; CHECK-BF16-LABEL: exp2_bf16_test_v( 389*3ba339b5SPrinceton Ferro; CHECK-BF16: { 390*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: .reg .b32 %r<3>; 391*3ba339b5SPrinceton Ferro; CHECK-BF16-EMPTY: 392*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: // %bb.0: // %entry 393*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ld.param.b32 %r1, [exp2_bf16_test_v_param_0]; 394*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ex2.approx.ftz.bf16x2 %r2, %r1; 395*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: st.param.b32 [func_retval0], %r2; 396*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT: ret; 397*3ba339b5SPrinceton Ferroentry: 398*3ba339b5SPrinceton Ferro %exp2 = call <2 x bfloat> @llvm.exp2.v2bf16(<2 x bfloat> %in) 399*3ba339b5SPrinceton Ferro ret <2 x bfloat> %exp2 400*3ba339b5SPrinceton Ferro} 401*3ba339b5SPrinceton Ferro 402*3ba339b5SPrinceton Ferrodeclare float @llvm.exp2.f32(float %val) 403*3ba339b5SPrinceton Ferro 404*3ba339b5SPrinceton Ferrodeclare <2 x float> @llvm.exp2.v2f32(<2 x float> %val) 405*3ba339b5SPrinceton Ferro 406*3ba339b5SPrinceton Ferrodeclare half @llvm.exp2.f16(half %val) 407*3ba339b5SPrinceton Ferro 408*3ba339b5SPrinceton Ferrodeclare <2 x half> @llvm.exp2.v2f16(<2 x half> %val) 409*3ba339b5SPrinceton Ferro 410*3ba339b5SPrinceton Ferrodeclare bfloat @llvm.exp2.bf16(bfloat %val) 411*3ba339b5SPrinceton Ferro 412*3ba339b5SPrinceton Ferrodeclare <2 x bfloat> @llvm.exp2.v2bf16(<2 x bfloat> %val) 413*3ba339b5SPrinceton Ferro 414*3ba339b5SPrinceton Ferroattributes #0 = {"denormal-fp-math"="preserve-sign"} 415