1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2 3; ## FP16 abs is not supported by PTX version (PTX < 65). 4; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx60 \ 5; RUN: -O0 -disable-post-ra -verify-machineinstrs \ 6; RUN: | FileCheck -check-prefix CHECK-NOF16 %s 7; RUN: %if ptxas %{ \ 8; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx60 \ 9; RUN: -O0 -disable-post-ra -verify-machineinstrs \ 10; RUN: | %ptxas-verify -arch=sm_53 \ 11; RUN: %} 12 13; ## FP16 support explicitly disabled (--nvptx-no-f16-math). 14; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx65 --nvptx-no-f16-math \ 15; RUN: -O0 -disable-post-ra -verify-machineinstrs \ 16; RUN: | FileCheck -check-prefix CHECK-NOF16 %s 17; RUN: %if ptxas %{ \ 18; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx65 --nvptx-no-f16-math \ 19; RUN: -O0 -disable-post-ra -verify-machineinstrs \ 20; RUN: | %ptxas-verify -arch=sm_53 \ 21; RUN: %} 22 23; ## FP16 is not supported by hardware (SM < 53). 24; RUN: llc < %s -mcpu=sm_52 -mattr=+ptx65 \ 25; RUN: -O0 -disable-post-ra -verify-machineinstrs \ 26; RUN: | FileCheck -check-prefix CHECK-NOF16 %s 27; RUN: %if ptxas %{ \ 28; RUN: llc < %s -mcpu=sm_52 -mattr=+ptx65 \ 29; RUN: -O0 -disable-post-ra -verify-machineinstrs \ 30; RUN: | %ptxas-verify -arch=sm_52 \ 31; RUN: %} 32 33; ## Full FP16 abs support. 34; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx65 \ 35; RUN: -O0 -disable-post-ra -verify-machineinstrs \ 36; RUN: | FileCheck -check-prefix CHECK-F16-ABS %s 37; RUN: %if ptxas %{ \ 38; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx65 \ 39; RUN: -O0 -disable-post-ra -verify-machineinstrs \ 40; RUN: | %ptxas-verify -arch=sm_53 \ 41; RUN: %} 42 43target triple = "nvptx64-nvidia-cuda" 44 45declare half @llvm.fabs.f16(half %a) 46declare <2 x half> @llvm.fabs.v2f16(<2 x half> %a) 47 48define half @test_fabs(half %a) { 49; CHECK-NOF16-LABEL: test_fabs( 50; CHECK-NOF16: { 51; CHECK-NOF16-NEXT: .reg .b16 %rs<3>; 52; CHECK-NOF16-NEXT: .reg .f32 %f<3>; 53; CHECK-NOF16-EMPTY: 54; CHECK-NOF16-NEXT: // %bb.0: 55; CHECK-NOF16-NEXT: ld.param.b16 %rs1, [test_fabs_param_0]; 56; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs1; 57; CHECK-NOF16-NEXT: abs.f32 %f2, %f1; 58; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs2, %f2; 59; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs2; 60; CHECK-NOF16-NEXT: ret; 61; 62; CHECK-F16-ABS-LABEL: test_fabs( 63; CHECK-F16-ABS: { 64; CHECK-F16-ABS-NEXT: .reg .b16 %rs<3>; 65; CHECK-F16-ABS-EMPTY: 66; CHECK-F16-ABS-NEXT: // %bb.0: 67; CHECK-F16-ABS-NEXT: ld.param.b16 %rs1, [test_fabs_param_0]; 68; CHECK-F16-ABS-NEXT: abs.f16 %rs2, %rs1; 69; CHECK-F16-ABS-NEXT: st.param.b16 [func_retval0], %rs2; 70; CHECK-F16-ABS-NEXT: ret; 71 %r = call half @llvm.fabs.f16(half %a) 72 ret half %r 73} 74 75define <2 x half> @test_fabs_2(<2 x half> %a) #0 { 76; CHECK-F16-LABEL: test_fabs_2( 77; CHECK-F16: { 78; CHECK-F16-NEXT: .reg .b32 %r<5>; 79; CHECK-F16-EMPTY: 80; CHECK-F16-NEXT: // %bb.0: 81; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fabs_2_param_0]; 82; CHECK-F16-NEXT: and.b32 %r3, %r1, 2147450879; 83; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; 84; CHECK-F16-NEXT: ret; 85; 86; CHECK-F16-ABS-LABEL: test_fabs_2( 87; CHECK-F16-ABS: { 88; CHECK-F16-ABS-NEXT: .reg .b32 %r<3>; 89; CHECK-F16-ABS-EMPTY: 90; CHECK-F16-ABS-NEXT: // %bb.0: 91; CHECK-F16-ABS-NEXT: ld.param.b32 %r1, [test_fabs_2_param_0]; 92; CHECK-F16-ABS-NEXT: abs.f16x2 %r2, %r1; 93; CHECK-F16-ABS-NEXT: st.param.b32 [func_retval0], %r2; 94; CHECK-F16-ABS-NEXT: ret; 95 %r = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a) 96 ret <2 x half> %r 97} 98 99