1*3ba339b5SPrinceton Ferro; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2*3ba339b5SPrinceton Ferro; RUN: llc < %s -mcpu=sm_75 -mattr=+ptx70 | FileCheck --check-prefixes=CHECK-FP16 %s 3*3ba339b5SPrinceton Ferro; RUN: %if ptxas-11.0 %{ llc < %s -mcpu=sm_75 -mattr=+ptx70 | %ptxas-verify -arch=sm_75 %} 4*3ba339b5SPrinceton Ferrotarget triple = "nvptx64-nvidia-cuda" 569a8350cSNicolas Miller 669a8350cSNicolas Millerdeclare half @llvm.nvvm.ex2.approx.f16(half) 769a8350cSNicolas Millerdeclare <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half>) 869a8350cSNicolas Miller 9*3ba339b5SPrinceton Ferro; CHECK-LABEL: ex2_half 10*3ba339b5SPrinceton Ferrodefine half @ex2_half(half %0) { 11*3ba339b5SPrinceton Ferro; CHECK-FP16-LABEL: ex2_half( 12*3ba339b5SPrinceton Ferro; CHECK-FP16: { 13*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: .reg .b16 %rs<3>; 14*3ba339b5SPrinceton Ferro; CHECK-FP16-EMPTY: 15*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: // %bb.0: 16*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ld.param.b16 %rs1, [ex2_half_param_0]; 17*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ex2.approx.f16 %rs2, %rs1; 18*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: st.param.b16 [func_retval0], %rs2; 19*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ret; 20*3ba339b5SPrinceton Ferro %res = call half @llvm.nvvm.ex2.approx.f16(half %0) 2169a8350cSNicolas Miller ret half %res 2269a8350cSNicolas Miller} 2369a8350cSNicolas Miller 24*3ba339b5SPrinceton Ferro; CHECK-LABEL: ex2_2xhalf 25*3ba339b5SPrinceton Ferrodefine <2 x half> @ex2_2xhalf(<2 x half> %0) { 26*3ba339b5SPrinceton Ferro; CHECK-FP16-LABEL: ex2_2xhalf( 27*3ba339b5SPrinceton Ferro; CHECK-FP16: { 28*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: .reg .b32 %r<3>; 29*3ba339b5SPrinceton Ferro; CHECK-FP16-EMPTY: 30*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: // %bb.0: 31*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ld.param.b32 %r1, [ex2_2xhalf_param_0]; 32*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ex2.approx.f16x2 %r2, %r1; 33*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: st.param.b32 [func_retval0], %r2; 34*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT: ret; 35*3ba339b5SPrinceton Ferro %res = call <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half> %0) 3669a8350cSNicolas Miller ret <2 x half> %res 3769a8350cSNicolas Miller} 38