1; ## Full FP16 support enabled by default. 2; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \ 3; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \ 4; RUN: -mattr=+ptx60 \ 5; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOFTZ,CHECK-F16-NOFTZ %s 6; RUN: %if ptxas %{ \ 7; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \ 8; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \ 9; RUN: -mattr=+ptx60 \ 10; RUN: | %ptxas-verify -arch=sm_53 \ 11; RUN: %} 12; ## Full FP16 with FTZ 13; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \ 14; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \ 15; RUN: -denormal-fp-math-f32=preserve-sign -mattr=+ptx60 \ 16; RUN: | FileCheck -check-prefixes CHECK,CHECK-F16-FTZ %s 17; RUN: %if ptxas %{ \ 18; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \ 19; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \ 20; RUN: -denormal-fp-math-f32=preserve-sign -mattr=+ptx60 \ 21; RUN: | %ptxas-verify -arch=sm_53 \ 22; RUN: %} 23; ## FP16 support explicitly disabled. 24; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \ 25; RUN: -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \ 26; RUN: -verify-machineinstrs -mattr=+ptx60 \ 27; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOFTZ,CHECK-NOF16 %s 28; RUN: %if ptxas %{ \ 29; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \ 30; RUN: -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \ 31; RUN: | %ptxas-verify -arch=sm_53 \ 32; RUN: %} 33; ## FP16 is not supported by hardware. 34; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \ 35; RUN: -disable-post-ra -frame-pointer=all -verify-machineinstrs \ 36; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOFTZ,CHECK-NOF16 %s 37; RUN: %if ptxas %{ \ 38; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \ 39; RUN: -disable-post-ra -frame-pointer=all -verify-machineinstrs \ 40; RUN: | %ptxas-verify -arch=sm_52 \ 41; RUN: %} 42 43target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 44 45; CHECK-LABEL: test_ret_const( 46; CHECK: mov.b16 [[R:%rs[0-9]+]], 0x3C00; 47; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; 48; CHECK-NEXT: ret; 49define half @test_ret_const() #0 { 50 ret half 1.0 51} 52 53; CHECK-LABEL: test_fadd( 54; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fadd_param_0]; 55; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_param_1]; 56; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; 57; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; 58; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] 59; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 60; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; 61; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] 62; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; 63; CHECK-NEXT: ret; 64define half @test_fadd(half %a, half %b) #0 { 65 %r = fadd half %a, %b 66 ret half %r 67} 68 69; CHECK-LABEL: test_fadd_v1f16( 70; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fadd_v1f16_param_0]; 71; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_v1f16_param_1]; 72; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; 73; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; 74; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] 75; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 76; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; 77; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] 78; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; 79; CHECK-NEXT: ret; 80define <1 x half> @test_fadd_v1f16(<1 x half> %a, <1 x half> %b) #0 { 81 %r = fadd <1 x half> %a, %b 82 ret <1 x half> %r 83} 84 85; Check that we can lower fadd with immediate arguments. 86; CHECK-LABEL: test_fadd_imm_0( 87; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_imm_0_param_0]; 88; CHECK-F16-NOFTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00; 89; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[A]]; 90; CHECK-F16-FTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00; 91; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[A]]; 92; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 93; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000; 94; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] 95; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; 96; CHECK-NEXT: ret; 97define half @test_fadd_imm_0(half %b) #0 { 98 %r = fadd half 1.0, %b 99 ret half %r 100} 101 102; CHECK-LABEL: test_fadd_imm_1( 103; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_imm_1_param_0]; 104; CHECK-F16-NOFTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00; 105; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[A]]; 106; CHECK-F16-FTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00; 107; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[A]]; 108; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 109; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000; 110; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] 111; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; 112; CHECK-NEXT: ret; 113define half @test_fadd_imm_1(half %a) #0 { 114 %r = fadd half %a, 1.0 115 ret half %r 116} 117 118; CHECK-LABEL: test_fsub( 119; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fsub_param_0]; 120; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fsub_param_1]; 121; CHECK-F16-NOFTZ-NEXT: sub.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; 122; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; 123; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] 124; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 125; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; 126; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] 127; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; 128; CHECK-NEXT: ret; 129define half @test_fsub(half %a, half %b) #0 { 130 %r = fsub half %a, %b 131 ret half %r 132} 133 134; CHECK-LABEL: test_old_fneg( 135; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_old_fneg_param_0]; 136; CHECK-F16-NOFTZ-NEXT: mov.b16 [[Z:%rs[0-9]+]], 0x0000 137; CHECK-F16-NOFTZ-NEXT: sub.rn.f16 [[R:%rs[0-9]+]], [[Z]], [[A]]; 138; CHECK-F16-FTZ-NEXT: mov.b16 [[Z:%rs[0-9]+]], 0x0000 139; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%rs[0-9]+]], [[Z]], [[A]]; 140; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] 141; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000; 142; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[Z]], [[A32]]; 143; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] 144; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; 145; CHECK-NEXT: ret; 146define half @test_old_fneg(half %a) #0 { 147 %r = fsub half 0.0, %a 148 ret half %r 149} 150 151; CHECK-LABEL: test_fneg( 152; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fneg_param_0]; 153; CHECK-F16-NOFTZ-NEXT: neg.f16 [[R:%rs[0-9]+]], [[A]]; 154; CHECK-F16-FTZ-NEXT: neg.ftz.f16 [[R:%rs[0-9]+]], [[A]]; 155; CHECK-NOF16-NEXT: xor.b16 [[R:%rs[0-9]+]], [[A]], -32768; 156; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; 157; CHECK-NEXT: ret; 158define half @test_fneg(half %a) #0 { 159 %r = fneg half %a 160 ret half %r 161} 162 163; CHECK-LABEL: test_fmul( 164; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fmul_param_0]; 165; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fmul_param_1]; 166; CHECK-F16-NOFTZ-NEXT: mul.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; 167; CHECK-F16-FTZ-NEXT: mul.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; 168; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] 169; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 170; CHECK-NOF16-NEXT: mul.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; 171; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] 172; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; 173; CHECK-NEXT: ret; 174define half @test_fmul(half %a, half %b) #0 { 175 %r = fmul half %a, %b 176 ret half %r 177} 178 179; CHECK-LABEL: test_fdiv( 180; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fdiv_param_0]; 181; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fdiv_param_1]; 182; CHECK-NOFTZ-DAG: cvt.f32.f16 [[F0:%f[0-9]+]], [[A]]; 183; CHECK-NOFTZ-DAG: cvt.f32.f16 [[F1:%f[0-9]+]], [[B]]; 184; CHECK-NOFTZ-NEXT: div.rn.f32 [[FR:%f[0-9]+]], [[F0]], [[F1]]; 185; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[F0:%f[0-9]+]], [[A]]; 186; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[F1:%f[0-9]+]], [[B]]; 187; CHECK-F16-FTZ-NEXT: div.rn.ftz.f32 [[FR:%f[0-9]+]], [[F0]], [[F1]]; 188; CHECK-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[FR]]; 189; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; 190; CHECK-NEXT: ret; 191define half @test_fdiv(half %a, half %b) #0 { 192 %r = fdiv half %a, %b 193 ret half %r 194} 195 196; CHECK-LABEL: test_frem( 197; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_frem_param_0]; 198; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_frem_param_1]; 199; CHECK-NOFTZ-DAG: cvt.f32.f16 [[FA:%f[0-9]+]], [[A]]; 200; CHECK-NOFTZ-DAG: cvt.f32.f16 [[FB:%f[0-9]+]], [[B]]; 201; CHECK-NOFTZ-NEXT: div.rn.f32 [[D:%f[0-9]+]], [[FA]], [[FB]]; 202; CHECK-NOFTZ-NEXT: cvt.rzi.f32.f32 [[DI:%f[0-9]+]], [[D]]; 203; CHECK-NOFTZ-NEXT: mul.f32 [[RI:%f[0-9]+]], [[DI]], [[FB]]; 204; CHECK-NOFTZ-NEXT: sub.f32 [[RF:%f[0-9]+]], [[FA]], [[RI]]; 205; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[FA:%f[0-9]+]], [[A]]; 206; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[FB:%f[0-9]+]], [[B]]; 207; CHECK-F16-FTZ-NEXT: div.rn.ftz.f32 [[D:%f[0-9]+]], [[FA]], [[FB]]; 208; CHECK-F16-FTZ-NEXT: cvt.rzi.ftz.f32.f32 [[DI:%f[0-9]+]], [[D]]; 209; CHECK-F16-FTZ-NEXT: mul.ftz.f32 [[RI:%f[0-9]+]], [[DI]], [[FB]]; 210; CHECK-F16-FTZ-NEXT: sub.ftz.f32 [[RF:%f[0-9]+]], [[FA]], [[RI]]; 211; CHECK-NEXT: testp.infinite.f32 [[ISBINF:%p[0-9]+]], [[FB]]; 212; CHECK-NEXT: selp.f32 [[RESULT:%f[0-9]+]], [[FA]], [[RF]], [[ISBINF]]; 213; CHECK-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RESULT]]; 214; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; 215; CHECK-NEXT: ret; 216define half @test_frem(half %a, half %b) #0 { 217 %r = frem half %a, %b 218 ret half %r 219} 220 221; CHECK-LABEL: test_store( 222; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_store_param_0]; 223; CHECK-DAG: ld.param.u64 %[[PTR:rd[0-9]+]], [test_store_param_1]; 224; CHECK-NEXT: st.b16 [%[[PTR]]], [[A]]; 225; CHECK-NEXT: ret; 226define void @test_store(half %a, ptr %b) #0 { 227 store half %a, ptr %b 228 ret void 229} 230 231; CHECK-LABEL: test_load( 232; CHECK: ld.param.u64 %[[PTR:rd[0-9]+]], [test_load_param_0]; 233; CHECK-NEXT: ld.b16 [[R:%rs[0-9]+]], [%[[PTR]]]; 234; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; 235; CHECK-NEXT: ret; 236define half @test_load(ptr %a) #0 { 237 %r = load half, ptr %a 238 ret half %r 239} 240 241; CHECK-LABEL: .visible .func test_halfp0a1( 242; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_halfp0a1_param_0]; 243; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_halfp0a1_param_1]; 244; CHECK-DAG: ld.u8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]] 245; CHECK-DAG: st.u8 [%[[TO]]], [[B0]] 246; CHECK-DAG: ld.u8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1] 247; CHECK-DAG: st.u8 [%[[TO]]+1], [[B1]] 248; CHECK: ret 249define void @test_halfp0a1(ptr noalias readonly %from, ptr %to) { 250 %1 = load half, ptr %from , align 1 251 store half %1, ptr %to , align 1 252 ret void 253} 254 255declare half @test_callee(half %a, half %b) #0 256 257; CHECK-LABEL: test_call( 258; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_call_param_0]; 259; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_call_param_1]; 260; CHECK: { 261; CHECK-DAG: .param .align 2 .b8 param0[2]; 262; CHECK-DAG: .param .align 2 .b8 param1[2]; 263; CHECK-DAG: st.param.b16 [param0], [[A]]; 264; CHECK-DAG: st.param.b16 [param1], [[B]]; 265; CHECK-DAG: .param .align 2 .b8 retval0[2]; 266; CHECK: call.uni (retval0), 267; CHECK-NEXT: test_callee, 268; CHECK-NEXT: ( 269; CHECK-NEXT: param0, 270; CHECK-NEXT: param1 271; CHECK-NEXT: ); 272; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; 273; CHECK-NEXT: } 274; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; 275; CHECK-NEXT: ret; 276define half @test_call(half %a, half %b) #0 { 277 %r = call half @test_callee(half %a, half %b) 278 ret half %r 279} 280 281; CHECK-LABEL: test_call_flipped( 282; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_call_flipped_param_0]; 283; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_call_flipped_param_1]; 284; CHECK: { 285; CHECK-DAG: .param .align 2 .b8 param0[2]; 286; CHECK-DAG: .param .align 2 .b8 param1[2]; 287; CHECK-DAG: st.param.b16 [param0], [[B]]; 288; CHECK-DAG: st.param.b16 [param1], [[A]]; 289; CHECK-DAG: .param .align 2 .b8 retval0[2]; 290; CHECK: call.uni (retval0), 291; CHECK-NEXT: test_callee, 292; CHECK-NEXT: ( 293; CHECK-NEXT: param0, 294; CHECK-NEXT: param1 295; CHECK-NEXT: ); 296; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; 297; CHECK-NEXT: } 298; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; 299; CHECK-NEXT: ret; 300define half @test_call_flipped(half %a, half %b) #0 { 301 %r = call half @test_callee(half %b, half %a) 302 ret half %r 303} 304 305; CHECK-LABEL: test_tailcall_flipped( 306; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_tailcall_flipped_param_0]; 307; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_tailcall_flipped_param_1]; 308; CHECK: { 309; CHECK-DAG: .param .align 2 .b8 param0[2]; 310; CHECK-DAG: .param .align 2 .b8 param1[2]; 311; CHECK-DAG: st.param.b16 [param0], [[B]]; 312; CHECK-DAG: st.param.b16 [param1], [[A]]; 313; CHECK-DAG: .param .align 2 .b8 retval0[2]; 314; CHECK: call.uni (retval0), 315; CHECK-NEXT: test_callee, 316; CHECK-NEXT: ( 317; CHECK-NEXT: param0, 318; CHECK-NEXT: param1 319; CHECK-NEXT: ); 320; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; 321; CHECK-NEXT: } 322; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; 323; CHECK-NEXT: ret; 324define half @test_tailcall_flipped(half %a, half %b) #0 { 325 %r = tail call half @test_callee(half %b, half %a) 326 ret half %r 327} 328 329; CHECK-LABEL: test_select( 330; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_select_param_0]; 331; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_param_1]; 332; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1; 333; CHECK-NEXT: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]]; 334; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; 335; CHECK-NEXT: ret; 336define half @test_select(half %a, half %b, i1 zeroext %c) #0 { 337 %r = select i1 %c, half %a, half %b 338 ret half %r 339} 340 341; CHECK-LABEL: test_select_cc( 342; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_select_cc_param_0]; 343; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_cc_param_1]; 344; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_select_cc_param_2]; 345; CHECK-DAG: ld.param.b16 [[D:%rs[0-9]+]], [test_select_cc_param_3]; 346; CHECK-F16-NOFTZ: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]] 347; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]]; 348; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]]; 349; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[CF]], [[DF]] 350; CHECK: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]]; 351; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; 352; CHECK-NEXT: ret; 353define half @test_select_cc(half %a, half %b, half %c, half %d) #0 { 354 %cc = fcmp une half %c, %d 355 %r = select i1 %cc, half %a, half %b 356 ret half %r 357} 358 359; CHECK-LABEL: test_select_cc_f32_f16( 360; CHECK-DAG: ld.param.f32 [[A:%f[0-9]+]], [test_select_cc_f32_f16_param_0]; 361; CHECK-DAG: ld.param.f32 [[B:%f[0-9]+]], [test_select_cc_f32_f16_param_1]; 362; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_select_cc_f32_f16_param_2]; 363; CHECK-DAG: ld.param.b16 [[D:%rs[0-9]+]], [test_select_cc_f32_f16_param_3]; 364; CHECK-F16-NOFTZ: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]] 365; CHECK-F16-FTZ: setp.neu.ftz.f16 [[PRED:%p[0-9]+]], [[C]], [[D]] 366; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]]; 367; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]]; 368; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[CF]], [[DF]] 369; CHECK-NEXT: selp.f32 [[R:%f[0-9]+]], [[A]], [[B]], [[PRED]]; 370; CHECK-NEXT: st.param.f32 [func_retval0], [[R]]; 371; CHECK-NEXT: ret; 372define float @test_select_cc_f32_f16(float %a, float %b, half %c, half %d) #0 { 373 %cc = fcmp une half %c, %d 374 %r = select i1 %cc, float %a, float %b 375 ret float %r 376} 377 378; CHECK-LABEL: test_select_cc_f16_f32( 379; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_select_cc_f16_f32_param_0]; 380; CHECK-DAG: ld.param.f32 [[C:%f[0-9]+]], [test_select_cc_f16_f32_param_2]; 381; CHECK-DAG: ld.param.f32 [[D:%f[0-9]+]], [test_select_cc_f16_f32_param_3]; 382; CHECK-NOFTZ-DAG: setp.neu.f32 [[PRED:%p[0-9]+]], [[C]], [[D]] 383; CHECK-F16-FTZ-DAG: setp.neu.ftz.f32 [[PRED:%p[0-9]+]], [[C]], [[D]] 384; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_cc_f16_f32_param_1]; 385; CHECK-NEXT: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]]; 386; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; 387; CHECK-NEXT: ret; 388define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 { 389 %cc = fcmp une float %c, %d 390 %r = select i1 %cc, half %a, half %b 391 ret half %r 392} 393 394; CHECK-LABEL: test_fcmp_une( 395; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_une_param_0]; 396; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_une_param_1]; 397; CHECK-F16-NOFTZ: setp.neu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 398; CHECK-F16-FTZ: setp.neu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 399; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 400; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 401; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 402; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 403; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; 404; CHECK-NEXT: ret; 405define i1 @test_fcmp_une(half %a, half %b) #0 { 406 %r = fcmp une half %a, %b 407 ret i1 %r 408} 409 410; CHECK-LABEL: test_fcmp_ueq( 411; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ueq_param_0]; 412; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ueq_param_1]; 413; CHECK-F16-NOFTZ: setp.equ.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 414; CHECK-F16-FTZ: setp.equ.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 415; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 416; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 417; CHECK-NOF16: setp.equ.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 418; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 419; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; 420; CHECK-NEXT: ret; 421define i1 @test_fcmp_ueq(half %a, half %b) #0 { 422 %r = fcmp ueq half %a, %b 423 ret i1 %r 424} 425 426; CHECK-LABEL: test_fcmp_ugt( 427; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ugt_param_0]; 428; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ugt_param_1]; 429; CHECK-F16-NOFTZ: setp.gtu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 430; CHECK-F16-FTZ: setp.gtu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 431; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 432; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 433; CHECK-NOF16: setp.gtu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 434; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 435; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; 436; CHECK-NEXT: ret; 437define i1 @test_fcmp_ugt(half %a, half %b) #0 { 438 %r = fcmp ugt half %a, %b 439 ret i1 %r 440} 441 442; CHECK-LABEL: test_fcmp_uge( 443; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_uge_param_0]; 444; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_uge_param_1]; 445; CHECK-F16-NOFTZ: setp.geu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 446; CHECK-F16-FTZ: setp.geu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 447; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 448; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 449; CHECK-NOF16: setp.geu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 450; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 451; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; 452; CHECK-NEXT: ret; 453define i1 @test_fcmp_uge(half %a, half %b) #0 { 454 %r = fcmp uge half %a, %b 455 ret i1 %r 456} 457 458; CHECK-LABEL: test_fcmp_ult( 459; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ult_param_0]; 460; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ult_param_1]; 461; CHECK-F16-NOFTZ: setp.ltu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 462; CHECK-F16-FTZ: setp.ltu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 463; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 464; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 465; CHECK-NOF16: setp.ltu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 466; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 467; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; 468; CHECK-NEXT: ret; 469define i1 @test_fcmp_ult(half %a, half %b) #0 { 470 %r = fcmp ult half %a, %b 471 ret i1 %r 472} 473 474; CHECK-LABEL: test_fcmp_ule( 475; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ule_param_0]; 476; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ule_param_1]; 477; CHECK-F16-NOFTZ: setp.leu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 478; CHECK-F16-FTZ: setp.leu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 479; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 480; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 481; CHECK-NOF16: setp.leu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 482; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 483; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; 484; CHECK-NEXT: ret; 485define i1 @test_fcmp_ule(half %a, half %b) #0 { 486 %r = fcmp ule half %a, %b 487 ret i1 %r 488} 489 490 491; CHECK-LABEL: test_fcmp_uno( 492; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_uno_param_0]; 493; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_uno_param_1]; 494; CHECK-F16-NOFTZ: setp.nan.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 495; CHECK-F16-FTZ: setp.nan.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 496; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 497; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 498; CHECK-NOF16: setp.nan.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 499; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 500; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; 501; CHECK-NEXT: ret; 502define i1 @test_fcmp_uno(half %a, half %b) #0 { 503 %r = fcmp uno half %a, %b 504 ret i1 %r 505} 506 507; CHECK-LABEL: test_fcmp_one( 508; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_one_param_0]; 509; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_one_param_1]; 510; CHECK-F16-NOFTZ: setp.ne.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 511; CHECK-F16-FTZ: setp.ne.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 512; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 513; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 514; CHECK-NOF16: setp.ne.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 515; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 516; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; 517; CHECK-NEXT: ret; 518define i1 @test_fcmp_one(half %a, half %b) #0 { 519 %r = fcmp one half %a, %b 520 ret i1 %r 521} 522 523; CHECK-LABEL: test_fcmp_oeq( 524; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_oeq_param_0]; 525; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_oeq_param_1]; 526; CHECK-F16-NOFTZ: setp.eq.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 527; CHECK-F16-FTZ: setp.eq.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 528; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 529; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 530; CHECK-NOF16: setp.eq.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 531; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 532; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; 533; CHECK-NEXT: ret; 534define i1 @test_fcmp_oeq(half %a, half %b) #0 { 535 %r = fcmp oeq half %a, %b 536 ret i1 %r 537} 538 539; CHECK-LABEL: test_fcmp_ogt( 540; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ogt_param_0]; 541; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ogt_param_1]; 542; CHECK-F16-NOFTZ: setp.gt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 543; CHECK-F16-FTZ: setp.gt.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 544; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 545; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 546; CHECK-NOF16: setp.gt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 547; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 548; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; 549; CHECK-NEXT: ret; 550define i1 @test_fcmp_ogt(half %a, half %b) #0 { 551 %r = fcmp ogt half %a, %b 552 ret i1 %r 553} 554 555; CHECK-LABEL: test_fcmp_oge( 556; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_oge_param_0]; 557; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_oge_param_1]; 558; CHECK-F16-NOFTZ: setp.ge.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 559; CHECK-F16-FTZ: setp.ge.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 560; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 561; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 562; CHECK-NOF16: setp.ge.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 563; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 564; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; 565; CHECK-NEXT: ret; 566define i1 @test_fcmp_oge(half %a, half %b) #0 { 567 %r = fcmp oge half %a, %b 568 ret i1 %r 569} 570 571; XCHECK-LABEL: test_fcmp_olt( 572; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_olt_param_0]; 573; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_olt_param_1]; 574; CHECK-F16-NOFTZ: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 575; CHECK-F16-FTZ: setp.lt.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 576; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 577; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 578; CHECK-NOF16: setp.lt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 579; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 580; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; 581; CHECK-NEXT: ret; 582define i1 @test_fcmp_olt(half %a, half %b) #0 { 583 %r = fcmp olt half %a, %b 584 ret i1 %r 585} 586 587; XCHECK-LABEL: test_fcmp_ole( 588; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ole_param_0]; 589; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ole_param_1]; 590; CHECK-F16-NOFTZ: setp.le.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 591; CHECK-F16-FTZ: setp.le.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 592; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 593; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 594; CHECK-NOF16: setp.le.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 595; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 596; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; 597; CHECK-NEXT: ret; 598define i1 @test_fcmp_ole(half %a, half %b) #0 { 599 %r = fcmp ole half %a, %b 600 ret i1 %r 601} 602 603; CHECK-LABEL: test_fcmp_ord( 604; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ord_param_0]; 605; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ord_param_1]; 606; CHECK-F16-NOFTZ: setp.num.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 607; CHECK-F16-FTZ: setp.num.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 608; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 609; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 610; CHECK-NOF16: setp.num.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 611; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 612; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; 613; CHECK-NEXT: ret; 614define i1 @test_fcmp_ord(half %a, half %b) #0 { 615 %r = fcmp ord half %a, %b 616 ret i1 %r 617} 618 619; CHECK-LABEL: test_br_cc( 620; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_br_cc_param_0]; 621; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_br_cc_param_1]; 622; CHECK-DAG: ld.param.u64 %[[C:rd[0-9]+]], [test_br_cc_param_2]; 623; CHECK-DAG: ld.param.u64 %[[D:rd[0-9]+]], [test_br_cc_param_3]; 624; CHECK-F16-NOFTZ: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 625; CHECK-F16-FTZ: setp.lt.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 626; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 627; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 628; CHECK-NOF16: setp.lt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 629; CHECK-NEXT: @[[PRED]] bra [[LABEL:\$L__BB.*]]; 630; CHECK: st.u32 [%[[C]]], 631; CHECK: [[LABEL]]: 632; CHECK: st.u32 [%[[D]]], 633; CHECK: ret; 634define void @test_br_cc(half %a, half %b, ptr %p1, ptr %p2) #0 { 635 %c = fcmp uge half %a, %b 636 br i1 %c, label %then, label %else 637then: 638 store i32 0, ptr %p1 639 ret void 640else: 641 store i32 0, ptr %p2 642 ret void 643} 644 645; CHECK-LABEL: test_phi( 646; CHECK: ld.param.u64 %[[P1:rd[0-9]+]], [test_phi_param_0]; 647; CHECK: ld.b16 {{%rs[0-9]+}}, [%[[P1]]]; 648; CHECK: [[LOOP:\$L__BB[0-9_]+]]: 649; CHECK: mov.u16 [[R:%rs[0-9]+]], [[AB:%rs[0-9]+]]; 650; CHECK: ld.b16 [[AB:%rs[0-9]+]], [%[[P1]]]; 651; CHECK: { 652; CHECK: st.param.b64 [param0], %[[P1]]; 653; CHECK: call.uni (retval0), 654; CHECK-NEXT: test_dummy 655; CHECK: } 656; CHECK: setp.eq.b32 [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 1; 657; CHECK: @[[PRED]] bra [[LOOP]]; 658; CHECK: st.param.b16 [func_retval0], [[R]]; 659; CHECK: ret; 660define half @test_phi(ptr %p1) #0 { 661entry: 662 %a = load half, ptr %p1 663 br label %loop 664loop: 665 %r = phi half [%a, %entry], [%b, %loop] 666 %b = load half, ptr %p1 667 %c = call i1 @test_dummy(ptr %p1) 668 br i1 %c, label %loop, label %return 669return: 670 ret half %r 671} 672declare i1 @test_dummy(ptr %p1) #0 673 674; CHECK-LABEL: test_fptosi_i32( 675; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptosi_i32_param_0]; 676; CHECK: cvt.rzi.s32.f16 [[R:%r[0-9]+]], [[A]]; 677; CHECK: st.param.b32 [func_retval0], [[R]]; 678; CHECK: ret; 679define i32 @test_fptosi_i32(half %a) #0 { 680 %r = fptosi half %a to i32 681 ret i32 %r 682} 683 684; CHECK-LABEL: test_fptosi_i64( 685; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptosi_i64_param_0]; 686; CHECK: cvt.rzi.s64.f16 [[R:%rd[0-9]+]], [[A]]; 687; CHECK: st.param.b64 [func_retval0], [[R]]; 688; CHECK: ret; 689define i64 @test_fptosi_i64(half %a) #0 { 690 %r = fptosi half %a to i64 691 ret i64 %r 692} 693 694; CHECK-LABEL: test_fptoui_i32( 695; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptoui_i32_param_0]; 696; CHECK: cvt.rzi.u32.f16 [[R:%r[0-9]+]], [[A]]; 697; CHECK: st.param.b32 [func_retval0], [[R]]; 698; CHECK: ret; 699define i32 @test_fptoui_i32(half %a) #0 { 700 %r = fptoui half %a to i32 701 ret i32 %r 702} 703 704; CHECK-LABEL: test_fptoui_i64( 705; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptoui_i64_param_0]; 706; CHECK: cvt.rzi.u64.f16 [[R:%rd[0-9]+]], [[A]]; 707; CHECK: st.param.b64 [func_retval0], [[R]]; 708; CHECK: ret; 709define i64 @test_fptoui_i64(half %a) #0 { 710 %r = fptoui half %a to i64 711 ret i64 %r 712} 713 714; CHECK-LABEL: test_uitofp_i32( 715; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_param_0]; 716; CHECK: cvt.rn.f16.u32 [[R:%rs[0-9]+]], [[A]]; 717; CHECK: st.param.b16 [func_retval0], [[R]]; 718; CHECK: ret; 719define half @test_uitofp_i32(i32 %a) #0 { 720 %r = uitofp i32 %a to half 721 ret half %r 722} 723 724; CHECK-LABEL: test_uitofp_i64( 725; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_uitofp_i64_param_0]; 726; CHECK: cvt.rn.f16.u64 [[R:%rs[0-9]+]], [[A]]; 727; CHECK: st.param.b16 [func_retval0], [[R]]; 728; CHECK: ret; 729define half @test_uitofp_i64(i64 %a) #0 { 730 %r = uitofp i64 %a to half 731 ret half %r 732} 733 734; CHECK-LABEL: test_sitofp_i32( 735; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_param_0]; 736; CHECK: cvt.rn.f16.s32 [[R:%rs[0-9]+]], [[A]]; 737; CHECK: st.param.b16 [func_retval0], [[R]]; 738; CHECK: ret; 739define half @test_sitofp_i32(i32 %a) #0 { 740 %r = sitofp i32 %a to half 741 ret half %r 742} 743 744; CHECK-LABEL: test_sitofp_i64( 745; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_sitofp_i64_param_0]; 746; CHECK: cvt.rn.f16.s64 [[R:%rs[0-9]+]], [[A]]; 747; CHECK: st.param.b16 [func_retval0], [[R]]; 748; CHECK: ret; 749define half @test_sitofp_i64(i64 %a) #0 { 750 %r = sitofp i64 %a to half 751 ret half %r 752} 753 754; CHECK-LABEL: test_uitofp_i32_fadd( 755; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_fadd_param_0]; 756; CHECK-DAG: cvt.rn.f16.u32 [[C:%rs[0-9]+]], [[A]]; 757; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_uitofp_i32_fadd_param_1]; 758; CHECK-F16-NOFTZ: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[C]]; 759; CHECK-F16-FTZ: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[C]]; 760; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 761; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] 762; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]]; 763; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] 764; CHECK: st.param.b16 [func_retval0], [[R]]; 765; CHECK: ret; 766define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 { 767 %c = uitofp i32 %a to half 768 %r = fadd half %b, %c 769 ret half %r 770} 771 772; CHECK-LABEL: test_sitofp_i32_fadd( 773; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_fadd_param_0]; 774; CHECK-DAG: cvt.rn.f16.s32 [[C:%rs[0-9]+]], [[A]]; 775; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_sitofp_i32_fadd_param_1]; 776; CHECK-F16-NOFTZ: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[C]]; 777; CHECK-F16-FTZ: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[C]]; 778; XCHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 779; XCHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] 780; XCHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]]; 781; XCHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] 782; CHECK: st.param.b16 [func_retval0], [[R]]; 783; CHECK: ret; 784define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 { 785 %c = sitofp i32 %a to half 786 %r = fadd half %b, %c 787 ret half %r 788} 789 790; CHECK-LABEL: test_fptrunc_float( 791; CHECK: ld.param.f32 [[A:%f[0-9]+]], [test_fptrunc_float_param_0]; 792; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[A]]; 793; CHECK: st.param.b16 [func_retval0], [[R]]; 794; CHECK: ret; 795define half @test_fptrunc_float(float %a) #0 { 796 %r = fptrunc float %a to half 797 ret half %r 798} 799 800; CHECK-LABEL: test_fptrunc_double( 801; CHECK: ld.param.f64 [[A:%fd[0-9]+]], [test_fptrunc_double_param_0]; 802; CHECK: cvt.rn.f16.f64 [[R:%rs[0-9]+]], [[A]]; 803; CHECK: st.param.b16 [func_retval0], [[R]]; 804; CHECK: ret; 805define half @test_fptrunc_double(double %a) #0 { 806 %r = fptrunc double %a to half 807 ret half %r 808} 809 810; CHECK-LABEL: test_fpext_float( 811; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fpext_float_param_0]; 812; CHECK-NOFTZ: cvt.f32.f16 [[R:%f[0-9]+]], [[A]]; 813; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[R:%f[0-9]+]], [[A]]; 814; CHECK: st.param.f32 [func_retval0], [[R]]; 815; CHECK: ret; 816define float @test_fpext_float(half %a) #0 { 817 %r = fpext half %a to float 818 ret float %r 819} 820 821; CHECK-LABEL: test_fpext_double( 822; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fpext_double_param_0]; 823; CHECK: cvt.f64.f16 [[R:%fd[0-9]+]], [[A]]; 824; CHECK: st.param.f64 [func_retval0], [[R]]; 825; CHECK: ret; 826define double @test_fpext_double(half %a) #0 { 827 %r = fpext half %a to double 828 ret double %r 829} 830 831 832; CHECK-LABEL: test_bitcast_halftoi16( 833; CHECK: ld.param.b16 [[AH:%rs[0-9]+]], [test_bitcast_halftoi16_param_0]; 834; CHECK: cvt.u32.u16 [[R:%r[0-9]+]], [[AH]] 835; CHECK: st.param.b32 [func_retval0], [[R]]; 836; CHECK: ret; 837define i16 @test_bitcast_halftoi16(half %a) #0 { 838 %r = bitcast half %a to i16 839 ret i16 %r 840} 841 842; CHECK-LABEL: test_bitcast_i16tohalf( 843; CHECK: ld.param.u16 [[AS:%rs[0-9]+]], [test_bitcast_i16tohalf_param_0]; 844; CHECK: st.param.b16 [func_retval0], [[AS]]; 845; CHECK: ret; 846define half @test_bitcast_i16tohalf(i16 %a) #0 { 847 %r = bitcast i16 %a to half 848 ret half %r 849} 850 851 852declare half @llvm.sqrt.f16(half %a) #0 853declare half @llvm.powi.f16.i32(half %a, i32 %b) #0 854declare half @llvm.sin.f16(half %a) #0 855declare half @llvm.cos.f16(half %a) #0 856declare half @llvm.pow.f16(half %a, half %b) #0 857declare half @llvm.exp.f16(half %a) #0 858declare half @llvm.exp2.f16(half %a) #0 859declare half @llvm.log.f16(half %a) #0 860declare half @llvm.log10.f16(half %a) #0 861declare half @llvm.log2.f16(half %a) #0 862declare half @llvm.fma.f16(half %a, half %b, half %c) #0 863declare half @llvm.fabs.f16(half %a) #0 864declare half @llvm.minnum.f16(half %a, half %b) #0 865declare half @llvm.maxnum.f16(half %a, half %b) #0 866declare half @llvm.copysign.f16(half %a, half %b) #0 867declare half @llvm.floor.f16(half %a) #0 868declare half @llvm.ceil.f16(half %a) #0 869declare half @llvm.trunc.f16(half %a) #0 870declare half @llvm.rint.f16(half %a) #0 871declare half @llvm.nearbyint.f16(half %a) #0 872declare half @llvm.round.f16(half %a) #0 873declare half @llvm.roundeven.f16(half %a) #0 874declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0 875 876; CHECK-LABEL: test_sqrt( 877; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_sqrt_param_0]; 878; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 879; CHECK-NOFTZ: sqrt.rn.f32 [[RF:%f[0-9]+]], [[AF]]; 880; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; 881; CHECK-F16-FTZ: sqrt.rn.ftz.f32 [[RF:%f[0-9]+]], [[AF]]; 882; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; 883; CHECK: st.param.b16 [func_retval0], [[R]]; 884; CHECK: ret; 885define half @test_sqrt(half %a) #0 { 886 %r = call half @llvm.sqrt.f16(half %a) 887 ret half %r 888} 889 890;;; Can't do this yet: requires libcall. 891; XCHECK-LABEL: test_powi( 892;define half @test_powi(half %a, i32 %b) #0 { 893; %r = call half @llvm.powi.f16.i32(half %a, i32 %b) 894; ret half %r 895;} 896 897; CHECK-LABEL: test_sin( 898; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_sin_param_0]; 899; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 900; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; 901; CHECK: sin.approx.f32 [[RF:%f[0-9]+]], [[AF]]; 902; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; 903; CHECK: st.param.b16 [func_retval0], [[R]]; 904; CHECK: ret; 905define half @test_sin(half %a) #0 #1 { 906 %r = call half @llvm.sin.f16(half %a) 907 ret half %r 908} 909 910; CHECK-LABEL: test_cos( 911; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_cos_param_0]; 912; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 913; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; 914; CHECK: cos.approx.f32 [[RF:%f[0-9]+]], [[AF]]; 915; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; 916; CHECK: st.param.b16 [func_retval0], [[R]]; 917; CHECK: ret; 918define half @test_cos(half %a) #0 #1 { 919 %r = call half @llvm.cos.f16(half %a) 920 ret half %r 921} 922 923;;; Can't do this yet: requires libcall. 924; XCHECK-LABEL: test_pow( 925;define half @test_pow(half %a, half %b) #0 { 926; %r = call half @llvm.pow.f16(half %a, half %b) 927; ret half %r 928;} 929 930;;; Can't do this yet: requires libcall. 931; XCHECK-LABEL: test_exp( 932;define half @test_exp(half %a) #0 { 933; %r = call half @llvm.exp.f16(half %a) 934; ret half %r 935;} 936 937;;; Can't do this yet: requires libcall. 938; XCHECK-LABEL: test_exp2( 939;define half @test_exp2(half %a) #0 { 940; %r = call half @llvm.exp2.f16(half %a) 941; ret half %r 942;} 943 944;;; Can't do this yet: requires libcall. 945; XCHECK-LABEL: test_log( 946;define half @test_log(half %a) #0 { 947; %r = call half @llvm.log.f16(half %a) 948; ret half %r 949;} 950 951;;; Can't do this yet: requires libcall. 952; XCHECK-LABEL: test_log10( 953;define half @test_log10(half %a) #0 { 954; %r = call half @llvm.log10.f16(half %a) 955; ret half %r 956;} 957 958;;; Can't do this yet: requires libcall. 959; XCHECK-LABEL: test_log2( 960;define half @test_log2(half %a) #0 { 961; %r = call half @llvm.log2.f16(half %a) 962; ret half %r 963;} 964 965; CHECK-LABEL: test_fma( 966; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fma_param_0]; 967; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fma_param_1]; 968; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_fma_param_2]; 969; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]]; 970; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]]; 971; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] 972; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 973; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] 974; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]]; 975; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] 976; CHECK: st.param.b16 [func_retval0], [[R]]; 977; CHECK: ret 978define half @test_fma(half %a, half %b, half %c) #0 { 979 %r = call half @llvm.fma.f16(half %a, half %b, half %c) 980 ret half %r 981} 982 983; CHECK-LABEL: test_fabs( 984; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fabs_param_0]; 985; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 986; CHECK-NOFTZ: abs.f32 [[RF:%f[0-9]+]], [[AF]]; 987; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; 988; CHECK-F16-FTZ: abs.ftz.f32 [[RF:%f[0-9]+]], [[AF]]; 989; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; 990; CHECK: st.param.b16 [func_retval0], [[R]]; 991; CHECK: ret; 992define half @test_fabs(half %a) #0 { 993 %r = call half @llvm.fabs.f16(half %a) 994 ret half %r 995} 996 997; CHECK-LABEL: test_minnum( 998; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_minnum_param_0]; 999; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_minnum_param_1]; 1000; CHECK-NOFTZ-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 1001; CHECK-NOFTZ-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 1002; CHECK-NOFTZ: min.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; 1003; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; 1004; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[BF:%f[0-9]+]], [[B]]; 1005; CHECK-F16-FTZ: min.ftz.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; 1006; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; 1007; CHECK: st.param.b16 [func_retval0], [[R]]; 1008; CHECK: ret; 1009define half @test_minnum(half %a, half %b) #0 { 1010 %r = call half @llvm.minnum.f16(half %a, half %b) 1011 ret half %r 1012} 1013 1014; CHECK-LABEL: test_maxnum( 1015; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_maxnum_param_0]; 1016; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_maxnum_param_1]; 1017; CHECK-NOFTZ-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 1018; CHECK-NOFTZ-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 1019; CHECK-NOFTZ: max.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; 1020; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; 1021; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[BF:%f[0-9]+]], [[B]]; 1022; CHECK-F16-FTZ: max.ftz.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; 1023; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; 1024; CHECK: st.param.b16 [func_retval0], [[R]]; 1025; CHECK: ret; 1026define half @test_maxnum(half %a, half %b) #0 { 1027 %r = call half @llvm.maxnum.f16(half %a, half %b) 1028 ret half %r 1029} 1030 1031; CHECK-LABEL: test_copysign( 1032; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_param_0]; 1033; CHECK-DAG: ld.param.b16 [[BH:%rs[0-9]+]], [test_copysign_param_1]; 1034; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AH]], 32767; 1035; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BH]], -32768; 1036; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]]; 1037; CHECK: st.param.b16 [func_retval0], [[RX]]; 1038; CHECK: ret; 1039define half @test_copysign(half %a, half %b) #0 { 1040 %r = call half @llvm.copysign.f16(half %a, half %b) 1041 ret half %r 1042} 1043 1044; CHECK-LABEL: test_copysign_f32( 1045; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_f32_param_0]; 1046; CHECK-DAG: ld.param.f32 [[BF:%f[0-9]+]], [test_copysign_f32_param_1]; 1047; CHECK-DAG: mov.b32 [[B:%r[0-9]+]], [[BF]]; 1048; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AH]], 32767; 1049; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[B]], -2147483648; 1050; CHECK-DAG: mov.b32 {tmp, [[BX2:%rs[0-9]+]]}, [[BX0]]; 1051; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]]; 1052; CHECK: st.param.b16 [func_retval0], [[RX]]; 1053; CHECK: ret; 1054define half @test_copysign_f32(half %a, float %b) #0 { 1055 %tb = fptrunc float %b to half 1056 %r = call half @llvm.copysign.f16(half %a, half %tb) 1057 ret half %r 1058} 1059 1060; CHECK-LABEL: test_copysign_f64( 1061; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_f64_param_0]; 1062; CHECK-DAG: ld.param.f64 [[BD:%fd[0-9]+]], [test_copysign_f64_param_1]; 1063; CHECK-DAG: mov.b64 [[B:%rd[0-9]+]], [[BD]]; 1064; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AH]], 32767; 1065; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[B]], -9223372036854775808; 1066; CHECK-DAG: shr.u64 [[BX1:%rd[0-9]+]], [[BX0]], 48; 1067; CHECK-DAG: cvt.u16.u64 [[BX2:%rs[0-9]+]], [[BX1]]; 1068; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]]; 1069; CHECK: st.param.b16 [func_retval0], [[RX]]; 1070; CHECK: ret; 1071define half @test_copysign_f64(half %a, double %b) #0 { 1072 %tb = fptrunc double %b to half 1073 %r = call half @llvm.copysign.f16(half %a, half %tb) 1074 ret half %r 1075} 1076 1077; CHECK-LABEL: test_copysign_extended( 1078; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_extended_param_0]; 1079; CHECK-DAG: ld.param.b16 [[BH:%rs[0-9]+]], [test_copysign_extended_param_1]; 1080; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AH]], 32767; 1081; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BH]], -32768; 1082; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]]; 1083; CHECK-NOFTZ: cvt.f32.f16 [[XR:%f[0-9]+]], [[RX]]; 1084; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[XR:%f[0-9]+]], [[RX]]; 1085; CHECK: st.param.f32 [func_retval0], [[XR]]; 1086; CHECK: ret; 1087define float @test_copysign_extended(half %a, half %b) #0 { 1088 %r = call half @llvm.copysign.f16(half %a, half %b) 1089 %xr = fpext half %r to float 1090 ret float %xr 1091} 1092 1093; CHECK-LABEL: test_floor( 1094; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_floor_param_0]; 1095; CHECK: cvt.rmi.f16.f16 [[R:%rs[0-9]+]], [[A]]; 1096; CHECK: st.param.b16 [func_retval0], [[R]]; 1097; CHECK: ret; 1098define half @test_floor(half %a) #0 { 1099 %r = call half @llvm.floor.f16(half %a) 1100 ret half %r 1101} 1102 1103; CHECK-LABEL: test_ceil( 1104; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_ceil_param_0]; 1105; CHECK: cvt.rpi.f16.f16 [[R:%rs[0-9]+]], [[A]]; 1106; CHECK: st.param.b16 [func_retval0], [[R]]; 1107; CHECK: ret; 1108define half @test_ceil(half %a) #0 { 1109 %r = call half @llvm.ceil.f16(half %a) 1110 ret half %r 1111} 1112 1113; CHECK-LABEL: test_trunc( 1114; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_trunc_param_0]; 1115; CHECK: cvt.rzi.f16.f16 [[R:%rs[0-9]+]], [[A]]; 1116; CHECK: st.param.b16 [func_retval0], [[R]]; 1117; CHECK: ret; 1118define half @test_trunc(half %a) #0 { 1119 %r = call half @llvm.trunc.f16(half %a) 1120 ret half %r 1121} 1122 1123; CHECK-LABEL: test_rint( 1124; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_rint_param_0]; 1125; CHECK: cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]]; 1126; CHECK: st.param.b16 [func_retval0], [[R]]; 1127; CHECK: ret; 1128define half @test_rint(half %a) #0 { 1129 %r = call half @llvm.rint.f16(half %a) 1130 ret half %r 1131} 1132 1133; CHECK-LABEL: test_nearbyint( 1134; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_nearbyint_param_0]; 1135; CHECK: cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]]; 1136; CHECK: st.param.b16 [func_retval0], [[R]]; 1137; CHECK: ret; 1138define half @test_nearbyint(half %a) #0 { 1139 %r = call half @llvm.nearbyint.f16(half %a) 1140 ret half %r 1141} 1142 1143; CHECK-LABEL: test_roundeven( 1144; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_roundeven_param_0]; 1145; CHECK: cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]]; 1146; CHECK: st.param.b16 [func_retval0], [[R]]; 1147; CHECK: ret; 1148define half @test_roundeven(half %a) #0 { 1149 %r = call half @llvm.roundeven.f16(half %a) 1150 ret half %r 1151} 1152 1153; CHECK-LABEL: test_round( 1154; CHECK: ld.param.b16 {{.*}}, [test_round_param_0]; 1155; check the use of sign mask and 0.5 to implement round 1156; CHECK: and.b32 [[R:%r[0-9]+]], {{.*}}, -2147483648; 1157; CHECK: or.b32 {{.*}}, [[R]], 1056964608; 1158; CHECK: st.param.b16 [func_retval0], {{.*}}; 1159; CHECK: ret; 1160define half @test_round(half %a) #0 { 1161 %r = call half @llvm.round.f16(half %a) 1162 ret half %r 1163} 1164 1165; CHECK-LABEL: test_fmuladd( 1166; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fmuladd_param_0]; 1167; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fmuladd_param_1]; 1168; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_fmuladd_param_2]; 1169; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]]; 1170; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]]; 1171; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] 1172; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 1173; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] 1174; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]]; 1175; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] 1176; CHECK: st.param.b16 [func_retval0], [[R]]; 1177; CHECK: ret; 1178define half @test_fmuladd(half %a, half %b, half %c) #0 { 1179 %r = call half @llvm.fmuladd.f16(half %a, half %b, half %c) 1180 ret half %r 1181} 1182 1183; CHECK-LABEL: test_neg_f16( 1184; CHECK-F16-NOFTZ: neg.f16 1185; CHECK-F16-FTZ: neg.ftz.f16 1186; CHECK-NOF16: xor.b16 %rs{{.*}}, %rs{{.*}}, -32768 1187define half @test_neg_f16(half noundef %arg) #0 { 1188 %res = fneg half %arg 1189 ret half %res 1190} 1191 1192; CHECK-LABEL: test_neg_f16x2( 1193; CHECK-F16-NOFTZ: neg.f16x2 1194; CHECK-F16-FTZ: neg.ftz.f16x2 1195; CHECK-NOF16: xor.b16 %rs{{.*}}, %rs{{.*}}, -32768 1196; CHECK-NOF16: xor.b16 %rs{{.*}}, %rs{{.*}}, -32768 1197define <2 x half> @test_neg_f16x2(<2 x half> noundef %arg) #0 { 1198 %res = fneg <2 x half> %arg 1199 ret <2 x half> %res 1200} 1201 1202attributes #0 = { nounwind } 1203attributes #1 = { "unsafe-fp-math" = "true" } 1204