1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; ## Full FP16 support enabled by default. 3; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 \ 4; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \ 5; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-F16 %s 6; RUN: %if ptxas %{ \ 7; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 \ 8; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \ 9; RUN: | %ptxas-verify -arch=sm_53 \ 10; RUN: %} 11; ## FP16 support explicitly disabled. 12; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 \ 13; RUN: -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \ 14; RUN: -verify-machineinstrs \ 15; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s 16; RUN: %if ptxas %{ \ 17; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 \ 18; RUN: -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \ 19; RUN: -verify-machineinstrs \ 20; RUN: | %ptxas-verify -arch=sm_53 \ 21; RUN: %} 22; ## FP16 is not supported by hardware. 23; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 \ 24; RUN: -disable-post-ra -frame-pointer=all -verify-machineinstrs \ 25; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s 26; RUN: %if ptxas %{ \ 27; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 \ 28; RUN: -disable-post-ra -frame-pointer=all -verify-machineinstrs \ 29; RUN: | %ptxas-verify -arch=sm_52 \ 30; RUN: %} 31 32target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 33 34define <2 x half> @test_ret_const() #0 { 35; CHECK-LABEL: test_ret_const( 36; CHECK: { 37; CHECK-NEXT: .reg .b32 %r<2>; 38; CHECK-EMPTY: 39; CHECK-NEXT: // %bb.0: 40; CHECK-NEXT: mov.b32 %r1, 1073757184; 41; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 42; CHECK-NEXT: ret; 43 ret <2 x half> <half 1.0, half 2.0> 44} 45 46define half @test_extract_0(<2 x half> %a) #0 { 47; CHECK-LABEL: test_extract_0( 48; CHECK: { 49; CHECK-NEXT: .reg .b16 %rs<2>; 50; CHECK-NEXT: .reg .b32 %r<2>; 51; CHECK-EMPTY: 52; CHECK-NEXT: // %bb.0: 53; CHECK-NEXT: ld.param.b32 %r1, [test_extract_0_param_0]; 54; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {%rs1, tmp}, %r1; } 55; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; 56; CHECK-NEXT: ret; 57 %e = extractelement <2 x half> %a, i32 0 58 ret half %e 59} 60 61define half @test_extract_1(<2 x half> %a) #0 { 62; CHECK-LABEL: test_extract_1( 63; CHECK: { 64; CHECK-NEXT: .reg .b16 %rs<2>; 65; CHECK-NEXT: .reg .b32 %r<2>; 66; CHECK-EMPTY: 67; CHECK-NEXT: // %bb.0: 68; CHECK-NEXT: ld.param.b32 %r1, [test_extract_1_param_0]; 69; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r1; } 70; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; 71; CHECK-NEXT: ret; 72 %e = extractelement <2 x half> %a, i32 1 73 ret half %e 74} 75 76define half @test_extract_i(<2 x half> %a, i64 %idx) #0 { 77; CHECK-LABEL: test_extract_i( 78; CHECK: { 79; CHECK-NEXT: .reg .pred %p<2>; 80; CHECK-NEXT: .reg .b16 %rs<4>; 81; CHECK-NEXT: .reg .b32 %r<2>; 82; CHECK-NEXT: .reg .b64 %rd<2>; 83; CHECK-EMPTY: 84; CHECK-NEXT: // %bb.0: 85; CHECK-NEXT: ld.param.u64 %rd1, [test_extract_i_param_1]; 86; CHECK-NEXT: ld.param.b32 %r1, [test_extract_i_param_0]; 87; CHECK-NEXT: setp.eq.s64 %p1, %rd1, 0; 88; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 89; CHECK-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; 90; CHECK-NEXT: st.param.b16 [func_retval0], %rs3; 91; CHECK-NEXT: ret; 92 %e = extractelement <2 x half> %a, i64 %idx 93 ret half %e 94} 95 96define <2 x half> @test_fadd(<2 x half> %a, <2 x half> %b) #0 { 97; CHECK-F16-LABEL: test_fadd( 98; CHECK-F16: { 99; CHECK-F16-NEXT: .reg .b32 %r<4>; 100; CHECK-F16-EMPTY: 101; CHECK-F16-NEXT: // %bb.0: 102; CHECK-F16-NEXT: ld.param.b32 %r2, [test_fadd_param_1]; 103; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fadd_param_0]; 104; CHECK-F16-NEXT: add.rn.f16x2 %r3, %r1, %r2; 105; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; 106; CHECK-F16-NEXT: ret; 107; 108; CHECK-NOF16-LABEL: test_fadd( 109; CHECK-NOF16: { 110; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 111; CHECK-NOF16-NEXT: .reg .b32 %r<4>; 112; CHECK-NOF16-NEXT: .reg .f32 %f<7>; 113; CHECK-NOF16-EMPTY: 114; CHECK-NOF16-NEXT: // %bb.0: 115; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fadd_param_1]; 116; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fadd_param_0]; 117; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 118; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 119; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 120; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 121; CHECK-NOF16-NEXT: add.rn.f32 %f3, %f2, %f1; 122; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %f3; 123; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; 124; CHECK-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; 125; CHECK-NOF16-NEXT: add.rn.f32 %f6, %f5, %f4; 126; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %f6; 127; CHECK-NOF16-NEXT: mov.b32 %r3, {%rs6, %rs5}; 128; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r3; 129; CHECK-NOF16-NEXT: ret; 130 %r = fadd <2 x half> %a, %b 131 ret <2 x half> %r 132} 133 134; Check that we can lower fadd with immediate arguments. 135define <2 x half> @test_fadd_imm_0(<2 x half> %a) #0 { 136; CHECK-F16-LABEL: test_fadd_imm_0( 137; CHECK-F16: { 138; CHECK-F16-NEXT: .reg .b32 %r<4>; 139; CHECK-F16-EMPTY: 140; CHECK-F16-NEXT: // %bb.0: 141; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fadd_imm_0_param_0]; 142; CHECK-F16-NEXT: mov.b32 %r2, 1073757184; 143; CHECK-F16-NEXT: add.rn.f16x2 %r3, %r1, %r2; 144; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; 145; CHECK-F16-NEXT: ret; 146; 147; CHECK-NOF16-LABEL: test_fadd_imm_0( 148; CHECK-NOF16: { 149; CHECK-NOF16-NEXT: .reg .b16 %rs<5>; 150; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 151; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 152; CHECK-NOF16-EMPTY: 153; CHECK-NOF16-NEXT: // %bb.0: 154; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fadd_imm_0_param_0]; 155; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1; 156; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 157; CHECK-NOF16-NEXT: add.rn.f32 %f2, %f1, 0f40000000; 158; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %f2; 159; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs1; 160; CHECK-NOF16-NEXT: add.rn.f32 %f4, %f3, 0f3F800000; 161; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs4, %f4; 162; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs4, %rs3}; 163; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r2; 164; CHECK-NOF16-NEXT: ret; 165 %r = fadd <2 x half> <half 1.0, half 2.0>, %a 166 ret <2 x half> %r 167} 168 169define <2 x half> @test_fadd_imm_1(<2 x half> %a) #0 { 170; CHECK-F16-LABEL: test_fadd_imm_1( 171; CHECK-F16: { 172; CHECK-F16-NEXT: .reg .b32 %r<4>; 173; CHECK-F16-EMPTY: 174; CHECK-F16-NEXT: // %bb.0: 175; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fadd_imm_1_param_0]; 176; CHECK-F16-NEXT: mov.b32 %r2, 1073757184; 177; CHECK-F16-NEXT: add.rn.f16x2 %r3, %r1, %r2; 178; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; 179; CHECK-F16-NEXT: ret; 180; 181; CHECK-NOF16-LABEL: test_fadd_imm_1( 182; CHECK-NOF16: { 183; CHECK-NOF16-NEXT: .reg .b16 %rs<5>; 184; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 185; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 186; CHECK-NOF16-EMPTY: 187; CHECK-NOF16-NEXT: // %bb.0: 188; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fadd_imm_1_param_0]; 189; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1; 190; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 191; CHECK-NOF16-NEXT: add.rn.f32 %f2, %f1, 0f40000000; 192; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %f2; 193; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs1; 194; CHECK-NOF16-NEXT: add.rn.f32 %f4, %f3, 0f3F800000; 195; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs4, %f4; 196; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs4, %rs3}; 197; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r2; 198; CHECK-NOF16-NEXT: ret; 199 %r = fadd <2 x half> %a, <half 1.0, half 2.0> 200 ret <2 x half> %r 201} 202 203define <2 x half> @test_fsub(<2 x half> %a, <2 x half> %b) #0 { 204; CHECK-F16-LABEL: test_fsub( 205; CHECK-F16: { 206; CHECK-F16-NEXT: .reg .b32 %r<4>; 207; CHECK-F16-EMPTY: 208; CHECK-F16-NEXT: // %bb.0: 209; CHECK-F16-NEXT: ld.param.b32 %r2, [test_fsub_param_1]; 210; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fsub_param_0]; 211; CHECK-F16-NEXT: sub.rn.f16x2 %r3, %r1, %r2; 212; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; 213; CHECK-F16-NEXT: ret; 214; 215; CHECK-NOF16-LABEL: test_fsub( 216; CHECK-NOF16: { 217; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 218; CHECK-NOF16-NEXT: .reg .b32 %r<4>; 219; CHECK-NOF16-NEXT: .reg .f32 %f<7>; 220; CHECK-NOF16-EMPTY: 221; CHECK-NOF16-NEXT: // %bb.0: 222; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fsub_param_1]; 223; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fsub_param_0]; 224; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 225; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 226; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 227; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 228; CHECK-NOF16-NEXT: sub.rn.f32 %f3, %f2, %f1; 229; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %f3; 230; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; 231; CHECK-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; 232; CHECK-NOF16-NEXT: sub.rn.f32 %f6, %f5, %f4; 233; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %f6; 234; CHECK-NOF16-NEXT: mov.b32 %r3, {%rs6, %rs5}; 235; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r3; 236; CHECK-NOF16-NEXT: ret; 237 %r = fsub <2 x half> %a, %b 238 ret <2 x half> %r 239} 240 241define <2 x half> @test_fneg(<2 x half> %a) #0 { 242; CHECK-F16-LABEL: test_fneg( 243; CHECK-F16: { 244; CHECK-F16-NEXT: .reg .b32 %r<4>; 245; CHECK-F16-EMPTY: 246; CHECK-F16-NEXT: // %bb.0: 247; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fneg_param_0]; 248; CHECK-F16-NEXT: mov.b32 %r2, 0; 249; CHECK-F16-NEXT: sub.rn.f16x2 %r3, %r2, %r1; 250; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; 251; CHECK-F16-NEXT: ret; 252; 253; CHECK-NOF16-LABEL: test_fneg( 254; CHECK-NOF16: { 255; CHECK-NOF16-NEXT: .reg .b16 %rs<5>; 256; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 257; CHECK-NOF16-NEXT: .reg .f32 %f<6>; 258; CHECK-NOF16-EMPTY: 259; CHECK-NOF16-NEXT: // %bb.0: 260; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fneg_param_0]; 261; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1; 262; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 263; CHECK-NOF16-NEXT: mov.f32 %f2, 0f00000000; 264; CHECK-NOF16-NEXT: sub.rn.f32 %f3, %f2, %f1; 265; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %f3; 266; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; 267; CHECK-NOF16-NEXT: sub.rn.f32 %f5, %f2, %f4; 268; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs4, %f5; 269; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs4, %rs3}; 270; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r2; 271; CHECK-NOF16-NEXT: ret; 272 %r = fsub <2 x half> <half 0.0, half 0.0>, %a 273 ret <2 x half> %r 274} 275 276define <2 x half> @test_fmul(<2 x half> %a, <2 x half> %b) #0 { 277; CHECK-F16-LABEL: test_fmul( 278; CHECK-F16: { 279; CHECK-F16-NEXT: .reg .b32 %r<4>; 280; CHECK-F16-EMPTY: 281; CHECK-F16-NEXT: // %bb.0: 282; CHECK-F16-NEXT: ld.param.b32 %r2, [test_fmul_param_1]; 283; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fmul_param_0]; 284; CHECK-F16-NEXT: mul.rn.f16x2 %r3, %r1, %r2; 285; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; 286; CHECK-F16-NEXT: ret; 287; 288; CHECK-NOF16-LABEL: test_fmul( 289; CHECK-NOF16: { 290; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 291; CHECK-NOF16-NEXT: .reg .b32 %r<4>; 292; CHECK-NOF16-NEXT: .reg .f32 %f<7>; 293; CHECK-NOF16-EMPTY: 294; CHECK-NOF16-NEXT: // %bb.0: 295; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fmul_param_1]; 296; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fmul_param_0]; 297; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 298; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 299; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 300; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 301; CHECK-NOF16-NEXT: mul.rn.f32 %f3, %f2, %f1; 302; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %f3; 303; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; 304; CHECK-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; 305; CHECK-NOF16-NEXT: mul.rn.f32 %f6, %f5, %f4; 306; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %f6; 307; CHECK-NOF16-NEXT: mov.b32 %r3, {%rs6, %rs5}; 308; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r3; 309; CHECK-NOF16-NEXT: ret; 310 %r = fmul <2 x half> %a, %b 311 ret <2 x half> %r 312} 313 314define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 { 315; CHECK-LABEL: test_fdiv( 316; CHECK: { 317; CHECK-NEXT: .reg .b16 %rs<7>; 318; CHECK-NEXT: .reg .b32 %r<4>; 319; CHECK-NEXT: .reg .f32 %f<7>; 320; CHECK-EMPTY: 321; CHECK-NEXT: // %bb.0: 322; CHECK-NEXT: ld.param.b32 %r2, [test_fdiv_param_1]; 323; CHECK-NEXT: ld.param.b32 %r1, [test_fdiv_param_0]; 324; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2; 325; CHECK-NEXT: cvt.f32.f16 %f1, %rs2; 326; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1; 327; CHECK-NEXT: cvt.f32.f16 %f2, %rs4; 328; CHECK-NEXT: div.rn.f32 %f3, %f2, %f1; 329; CHECK-NEXT: cvt.rn.f16.f32 %rs5, %f3; 330; CHECK-NEXT: cvt.f32.f16 %f4, %rs1; 331; CHECK-NEXT: cvt.f32.f16 %f5, %rs3; 332; CHECK-NEXT: div.rn.f32 %f6, %f5, %f4; 333; CHECK-NEXT: cvt.rn.f16.f32 %rs6, %f6; 334; CHECK-NEXT: mov.b32 %r3, {%rs6, %rs5}; 335; CHECK-NEXT: st.param.b32 [func_retval0], %r3; 336; CHECK-NEXT: ret; 337 %r = fdiv <2 x half> %a, %b 338 ret <2 x half> %r 339} 340 341; -- Load two 16x2 inputs and split them into f16 elements 342; -- Split into elements 343; -- promote to f32. 344; -- frem(a[0],b[0]). 345; -- frem(a[1],b[1]). 346; -- convert back to f16. 347; -- merge into f16x2 and return it. 348define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 { 349; CHECK-LABEL: test_frem( 350; CHECK: { 351; CHECK-NEXT: .reg .pred %p<3>; 352; CHECK-NEXT: .reg .b16 %rs<7>; 353; CHECK-NEXT: .reg .b32 %r<4>; 354; CHECK-NEXT: .reg .f32 %f<15>; 355; CHECK-EMPTY: 356; CHECK-NEXT: // %bb.0: 357; CHECK-NEXT: ld.param.b32 %r2, [test_frem_param_1]; 358; CHECK-NEXT: ld.param.b32 %r1, [test_frem_param_0]; 359; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2; 360; CHECK-NEXT: cvt.f32.f16 %f1, %rs2; 361; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1; 362; CHECK-NEXT: cvt.f32.f16 %f2, %rs4; 363; CHECK-NEXT: div.rn.f32 %f3, %f2, %f1; 364; CHECK-NEXT: cvt.rzi.f32.f32 %f4, %f3; 365; CHECK-NEXT: mul.f32 %f5, %f4, %f1; 366; CHECK-NEXT: sub.f32 %f6, %f2, %f5; 367; CHECK-NEXT: testp.infinite.f32 %p1, %f1; 368; CHECK-NEXT: selp.f32 %f7, %f2, %f6, %p1; 369; CHECK-NEXT: cvt.rn.f16.f32 %rs5, %f7; 370; CHECK-NEXT: cvt.f32.f16 %f8, %rs1; 371; CHECK-NEXT: cvt.f32.f16 %f9, %rs3; 372; CHECK-NEXT: div.rn.f32 %f10, %f9, %f8; 373; CHECK-NEXT: cvt.rzi.f32.f32 %f11, %f10; 374; CHECK-NEXT: mul.f32 %f12, %f11, %f8; 375; CHECK-NEXT: sub.f32 %f13, %f9, %f12; 376; CHECK-NEXT: testp.infinite.f32 %p2, %f8; 377; CHECK-NEXT: selp.f32 %f14, %f9, %f13, %p2; 378; CHECK-NEXT: cvt.rn.f16.f32 %rs6, %f14; 379; CHECK-NEXT: mov.b32 %r3, {%rs6, %rs5}; 380; CHECK-NEXT: st.param.b32 [func_retval0], %r3; 381; CHECK-NEXT: ret; 382 %r = frem <2 x half> %a, %b 383 ret <2 x half> %r 384} 385 386define void @test_ldst_v2f16(ptr %a, ptr %b) { 387; CHECK-LABEL: test_ldst_v2f16( 388; CHECK: { 389; CHECK-NEXT: .reg .b32 %r<2>; 390; CHECK-NEXT: .reg .b64 %rd<3>; 391; CHECK-EMPTY: 392; CHECK-NEXT: // %bb.0: 393; CHECK-NEXT: ld.param.u64 %rd2, [test_ldst_v2f16_param_1]; 394; CHECK-NEXT: ld.param.u64 %rd1, [test_ldst_v2f16_param_0]; 395; CHECK-NEXT: ld.b32 %r1, [%rd1]; 396; CHECK-NEXT: st.b32 [%rd2], %r1; 397; CHECK-NEXT: ret; 398 %t1 = load <2 x half>, ptr %a 399 store <2 x half> %t1, ptr %b, align 16 400 ret void 401} 402 403; -- v3 is inconvenient to capture as it's lowered as ld.b64 + fair 404; number of bitshifting instructions that may change at llvm's whim. 405; So we only verify that we only issue correct number of writes using 406; correct offset, but not the values we write. 407define void @test_ldst_v3f16(ptr %a, ptr %b) { 408; CHECK-LABEL: test_ldst_v3f16( 409; CHECK: { 410; CHECK-NEXT: .reg .b16 %rs<2>; 411; CHECK-NEXT: .reg .b32 %r<2>; 412; CHECK-NEXT: .reg .b64 %rd<4>; 413; CHECK-EMPTY: 414; CHECK-NEXT: // %bb.0: 415; CHECK-NEXT: ld.param.u64 %rd2, [test_ldst_v3f16_param_1]; 416; CHECK-NEXT: ld.param.u64 %rd1, [test_ldst_v3f16_param_0]; 417; CHECK-NEXT: ld.u64 %rd3, [%rd1]; 418; CHECK-NEXT: { .reg .b32 tmp; mov.b64 {tmp, %r1}, %rd3; } 419; CHECK-NEXT: st.u32 [%rd2], %rd3; 420; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {%rs1, tmp}, %r1; } 421; CHECK-NEXT: st.b16 [%rd2+4], %rs1; 422; CHECK-NEXT: ret; 423 %t1 = load <3 x half>, ptr %a 424 store <3 x half> %t1, ptr %b, align 16 425 ret void 426} 427 428define void @test_ldst_v4f16(ptr %a, ptr %b) { 429; CHECK-LABEL: test_ldst_v4f16( 430; CHECK: { 431; CHECK-NEXT: .reg .b16 %rs<5>; 432; CHECK-NEXT: .reg .b64 %rd<3>; 433; CHECK-EMPTY: 434; CHECK-NEXT: // %bb.0: 435; CHECK-NEXT: ld.param.u64 %rd2, [test_ldst_v4f16_param_1]; 436; CHECK-NEXT: ld.param.u64 %rd1, [test_ldst_v4f16_param_0]; 437; CHECK-NEXT: ld.v4.b16 {%rs1, %rs2, %rs3, %rs4}, [%rd1]; 438; CHECK-NEXT: st.v4.b16 [%rd2], {%rs1, %rs2, %rs3, %rs4}; 439; CHECK-NEXT: ret; 440 %t1 = load <4 x half>, ptr %a 441 store <4 x half> %t1, ptr %b, align 16 442 ret void 443} 444 445define void @test_ldst_v8f16(ptr %a, ptr %b) { 446; CHECK-LABEL: test_ldst_v8f16( 447; CHECK: { 448; CHECK-NEXT: .reg .b32 %r<5>; 449; CHECK-NEXT: .reg .b64 %rd<3>; 450; CHECK-EMPTY: 451; CHECK-NEXT: // %bb.0: 452; CHECK-NEXT: ld.param.u64 %rd2, [test_ldst_v8f16_param_1]; 453; CHECK-NEXT: ld.param.u64 %rd1, [test_ldst_v8f16_param_0]; 454; CHECK-NEXT: ld.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; 455; CHECK-NEXT: st.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; 456; CHECK-NEXT: ret; 457 %t1 = load <8 x half>, ptr %a 458 store <8 x half> %t1, ptr %b, align 16 459 ret void 460} 461 462declare <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) #0 463 464define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 { 465; CHECK-LABEL: test_call( 466; CHECK: { 467; CHECK-NEXT: .reg .b32 %r<5>; 468; CHECK-EMPTY: 469; CHECK-NEXT: // %bb.0: 470; CHECK-NEXT: ld.param.b32 %r2, [test_call_param_1]; 471; CHECK-NEXT: ld.param.b32 %r1, [test_call_param_0]; 472; CHECK-NEXT: { // callseq 0, 0 473; CHECK-NEXT: .param .align 4 .b8 param0[4]; 474; CHECK-NEXT: st.param.b32 [param0], %r1; 475; CHECK-NEXT: .param .align 4 .b8 param1[4]; 476; CHECK-NEXT: st.param.b32 [param1], %r2; 477; CHECK-NEXT: .param .align 4 .b8 retval0[4]; 478; CHECK-NEXT: call.uni (retval0), 479; CHECK-NEXT: test_callee, 480; CHECK-NEXT: ( 481; CHECK-NEXT: param0, 482; CHECK-NEXT: param1 483; CHECK-NEXT: ); 484; CHECK-NEXT: ld.param.b32 %r3, [retval0]; 485; CHECK-NEXT: } // callseq 0 486; CHECK-NEXT: st.param.b32 [func_retval0], %r3; 487; CHECK-NEXT: ret; 488 %r = call <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) 489 ret <2 x half> %r 490} 491 492define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 { 493; CHECK-LABEL: test_call_flipped( 494; CHECK: { 495; CHECK-NEXT: .reg .b32 %r<5>; 496; CHECK-EMPTY: 497; CHECK-NEXT: // %bb.0: 498; CHECK-NEXT: ld.param.b32 %r2, [test_call_flipped_param_1]; 499; CHECK-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0]; 500; CHECK-NEXT: { // callseq 1, 0 501; CHECK-NEXT: .param .align 4 .b8 param0[4]; 502; CHECK-NEXT: st.param.b32 [param0], %r2; 503; CHECK-NEXT: .param .align 4 .b8 param1[4]; 504; CHECK-NEXT: st.param.b32 [param1], %r1; 505; CHECK-NEXT: .param .align 4 .b8 retval0[4]; 506; CHECK-NEXT: call.uni (retval0), 507; CHECK-NEXT: test_callee, 508; CHECK-NEXT: ( 509; CHECK-NEXT: param0, 510; CHECK-NEXT: param1 511; CHECK-NEXT: ); 512; CHECK-NEXT: ld.param.b32 %r3, [retval0]; 513; CHECK-NEXT: } // callseq 1 514; CHECK-NEXT: st.param.b32 [func_retval0], %r3; 515; CHECK-NEXT: ret; 516 %r = call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a) 517 ret <2 x half> %r 518} 519 520define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 { 521; CHECK-LABEL: test_tailcall_flipped( 522; CHECK: { 523; CHECK-NEXT: .reg .b32 %r<5>; 524; CHECK-EMPTY: 525; CHECK-NEXT: // %bb.0: 526; CHECK-NEXT: ld.param.b32 %r2, [test_tailcall_flipped_param_1]; 527; CHECK-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0]; 528; CHECK-NEXT: { // callseq 2, 0 529; CHECK-NEXT: .param .align 4 .b8 param0[4]; 530; CHECK-NEXT: st.param.b32 [param0], %r2; 531; CHECK-NEXT: .param .align 4 .b8 param1[4]; 532; CHECK-NEXT: st.param.b32 [param1], %r1; 533; CHECK-NEXT: .param .align 4 .b8 retval0[4]; 534; CHECK-NEXT: call.uni (retval0), 535; CHECK-NEXT: test_callee, 536; CHECK-NEXT: ( 537; CHECK-NEXT: param0, 538; CHECK-NEXT: param1 539; CHECK-NEXT: ); 540; CHECK-NEXT: ld.param.b32 %r3, [retval0]; 541; CHECK-NEXT: } // callseq 2 542; CHECK-NEXT: st.param.b32 [func_retval0], %r3; 543; CHECK-NEXT: ret; 544 %r = tail call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a) 545 ret <2 x half> %r 546} 547 548define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 { 549; CHECK-LABEL: test_select( 550; CHECK: { 551; CHECK-NEXT: .reg .pred %p<2>; 552; CHECK-NEXT: .reg .b16 %rs<3>; 553; CHECK-NEXT: .reg .b32 %r<4>; 554; CHECK-EMPTY: 555; CHECK-NEXT: // %bb.0: 556; CHECK-NEXT: ld.param.u8 %rs1, [test_select_param_2]; 557; CHECK-NEXT: and.b16 %rs2, %rs1, 1; 558; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1; 559; CHECK-NEXT: ld.param.b32 %r2, [test_select_param_1]; 560; CHECK-NEXT: ld.param.b32 %r1, [test_select_param_0]; 561; CHECK-NEXT: selp.b32 %r3, %r1, %r2, %p1; 562; CHECK-NEXT: st.param.b32 [func_retval0], %r3; 563; CHECK-NEXT: ret; 564 %r = select i1 %c, <2 x half> %a, <2 x half> %b 565 ret <2 x half> %r 566} 567 568define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) #0 { 569; CHECK-F16-LABEL: test_select_cc( 570; CHECK-F16: { 571; CHECK-F16-NEXT: .reg .pred %p<3>; 572; CHECK-F16-NEXT: .reg .b16 %rs<7>; 573; CHECK-F16-NEXT: .reg .b32 %r<6>; 574; CHECK-F16-EMPTY: 575; CHECK-F16-NEXT: // %bb.0: 576; CHECK-F16-NEXT: ld.param.b32 %r4, [test_select_cc_param_3]; 577; CHECK-F16-NEXT: ld.param.b32 %r3, [test_select_cc_param_2]; 578; CHECK-F16-NEXT: ld.param.b32 %r2, [test_select_cc_param_1]; 579; CHECK-F16-NEXT: ld.param.b32 %r1, [test_select_cc_param_0]; 580; CHECK-F16-NEXT: setp.neu.f16x2 %p1|%p2, %r3, %r4; 581; CHECK-F16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 582; CHECK-F16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 583; CHECK-F16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p2; 584; CHECK-F16-NEXT: selp.b16 %rs6, %rs3, %rs1, %p1; 585; CHECK-F16-NEXT: mov.b32 %r5, {%rs6, %rs5}; 586; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r5; 587; CHECK-F16-NEXT: ret; 588; 589; CHECK-NOF16-LABEL: test_select_cc( 590; CHECK-NOF16: { 591; CHECK-NOF16-NEXT: .reg .pred %p<3>; 592; CHECK-NOF16-NEXT: .reg .b16 %rs<11>; 593; CHECK-NOF16-NEXT: .reg .b32 %r<6>; 594; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 595; CHECK-NOF16-EMPTY: 596; CHECK-NOF16-NEXT: // %bb.0: 597; CHECK-NOF16-NEXT: ld.param.b32 %r4, [test_select_cc_param_3]; 598; CHECK-NOF16-NEXT: ld.param.b32 %r3, [test_select_cc_param_2]; 599; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_select_cc_param_1]; 600; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_select_cc_param_0]; 601; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r4; 602; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs1; 603; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r3; 604; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs3; 605; CHECK-NOF16-NEXT: setp.neu.f32 %p1, %f2, %f1; 606; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs2; 607; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs4; 608; CHECK-NOF16-NEXT: setp.neu.f32 %p2, %f4, %f3; 609; CHECK-NOF16-NEXT: mov.b32 {%rs5, %rs6}, %r2; 610; CHECK-NOF16-NEXT: mov.b32 {%rs7, %rs8}, %r1; 611; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs6, %p2; 612; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs7, %rs5, %p1; 613; CHECK-NOF16-NEXT: mov.b32 %r5, {%rs10, %rs9}; 614; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r5; 615; CHECK-NOF16-NEXT: ret; 616 %cc = fcmp une <2 x half> %c, %d 617 %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b 618 ret <2 x half> %r 619} 620 621define <2 x float> @test_select_cc_f32_f16(<2 x float> %a, <2 x float> %b, 622; CHECK-F16-LABEL: test_select_cc_f32_f16( 623; CHECK-F16: { 624; CHECK-F16-NEXT: .reg .pred %p<3>; 625; CHECK-F16-NEXT: .reg .b32 %r<3>; 626; CHECK-F16-NEXT: .reg .f32 %f<7>; 627; CHECK-F16-EMPTY: 628; CHECK-F16-NEXT: // %bb.0: 629; CHECK-F16-NEXT: ld.param.v2.f32 {%f3, %f4}, [test_select_cc_f32_f16_param_1]; 630; CHECK-F16-NEXT: ld.param.v2.f32 {%f1, %f2}, [test_select_cc_f32_f16_param_0]; 631; CHECK-F16-NEXT: ld.param.b32 %r2, [test_select_cc_f32_f16_param_3]; 632; CHECK-F16-NEXT: ld.param.b32 %r1, [test_select_cc_f32_f16_param_2]; 633; CHECK-F16-NEXT: setp.neu.f16x2 %p1|%p2, %r1, %r2; 634; CHECK-F16-NEXT: selp.f32 %f5, %f2, %f4, %p2; 635; CHECK-F16-NEXT: selp.f32 %f6, %f1, %f3, %p1; 636; CHECK-F16-NEXT: st.param.v2.f32 [func_retval0], {%f6, %f5}; 637; CHECK-F16-NEXT: ret; 638; 639; CHECK-NOF16-LABEL: test_select_cc_f32_f16( 640; CHECK-NOF16: { 641; CHECK-NOF16-NEXT: .reg .pred %p<3>; 642; CHECK-NOF16-NEXT: .reg .b16 %rs<5>; 643; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 644; CHECK-NOF16-NEXT: .reg .f32 %f<11>; 645; CHECK-NOF16-EMPTY: 646; CHECK-NOF16-NEXT: // %bb.0: 647; CHECK-NOF16-NEXT: ld.param.v2.f32 {%f3, %f4}, [test_select_cc_f32_f16_param_1]; 648; CHECK-NOF16-NEXT: ld.param.v2.f32 {%f1, %f2}, [test_select_cc_f32_f16_param_0]; 649; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_select_cc_f32_f16_param_3]; 650; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_select_cc_f32_f16_param_2]; 651; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 652; CHECK-NOF16-NEXT: cvt.f32.f16 %f5, %rs1; 653; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 654; CHECK-NOF16-NEXT: cvt.f32.f16 %f6, %rs3; 655; CHECK-NOF16-NEXT: setp.neu.f32 %p1, %f6, %f5; 656; CHECK-NOF16-NEXT: cvt.f32.f16 %f7, %rs2; 657; CHECK-NOF16-NEXT: cvt.f32.f16 %f8, %rs4; 658; CHECK-NOF16-NEXT: setp.neu.f32 %p2, %f8, %f7; 659; CHECK-NOF16-NEXT: selp.f32 %f9, %f2, %f4, %p2; 660; CHECK-NOF16-NEXT: selp.f32 %f10, %f1, %f3, %p1; 661; CHECK-NOF16-NEXT: st.param.v2.f32 [func_retval0], {%f10, %f9}; 662; CHECK-NOF16-NEXT: ret; 663 <2 x half> %c, <2 x half> %d) #0 { 664 %cc = fcmp une <2 x half> %c, %d 665 %r = select <2 x i1> %cc, <2 x float> %a, <2 x float> %b 666 ret <2 x float> %r 667} 668 669define <2 x half> @test_select_cc_f16_f32(<2 x half> %a, <2 x half> %b, 670; CHECK-LABEL: test_select_cc_f16_f32( 671; CHECK: { 672; CHECK-NEXT: .reg .pred %p<3>; 673; CHECK-NEXT: .reg .b16 %rs<7>; 674; CHECK-NEXT: .reg .b32 %r<4>; 675; CHECK-NEXT: .reg .f32 %f<5>; 676; CHECK-EMPTY: 677; CHECK-NEXT: // %bb.0: 678; CHECK-NEXT: ld.param.v2.f32 {%f3, %f4}, [test_select_cc_f16_f32_param_3]; 679; CHECK-NEXT: ld.param.v2.f32 {%f1, %f2}, [test_select_cc_f16_f32_param_2]; 680; CHECK-NEXT: ld.param.b32 %r2, [test_select_cc_f16_f32_param_1]; 681; CHECK-NEXT: ld.param.b32 %r1, [test_select_cc_f16_f32_param_0]; 682; CHECK-NEXT: setp.neu.f32 %p1, %f1, %f3; 683; CHECK-NEXT: setp.neu.f32 %p2, %f2, %f4; 684; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2; 685; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1; 686; CHECK-NEXT: selp.b16 %rs5, %rs4, %rs2, %p2; 687; CHECK-NEXT: selp.b16 %rs6, %rs3, %rs1, %p1; 688; CHECK-NEXT: mov.b32 %r3, {%rs6, %rs5}; 689; CHECK-NEXT: st.param.b32 [func_retval0], %r3; 690; CHECK-NEXT: ret; 691 <2 x float> %c, <2 x float> %d) #0 { 692 %cc = fcmp une <2 x float> %c, %d 693 %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b 694 ret <2 x half> %r 695} 696 697define <2 x i1> @test_fcmp_une(<2 x half> %a, <2 x half> %b) #0 { 698; CHECK-F16-LABEL: test_fcmp_une( 699; CHECK-F16: { 700; CHECK-F16-NEXT: .reg .pred %p<3>; 701; CHECK-F16-NEXT: .reg .b16 %rs<3>; 702; CHECK-F16-NEXT: .reg .b32 %r<3>; 703; CHECK-F16-EMPTY: 704; CHECK-F16-NEXT: // %bb.0: 705; CHECK-F16-NEXT: ld.param.b32 %r2, [test_fcmp_une_param_1]; 706; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fcmp_une_param_0]; 707; CHECK-F16-NEXT: setp.neu.f16x2 %p1|%p2, %r1, %r2; 708; CHECK-F16-NEXT: selp.u16 %rs1, -1, 0, %p1; 709; CHECK-F16-NEXT: st.param.b8 [func_retval0], %rs1; 710; CHECK-F16-NEXT: selp.u16 %rs2, -1, 0, %p2; 711; CHECK-F16-NEXT: st.param.b8 [func_retval0+1], %rs2; 712; CHECK-F16-NEXT: ret; 713; 714; CHECK-NOF16-LABEL: test_fcmp_une( 715; CHECK-NOF16: { 716; CHECK-NOF16-NEXT: .reg .pred %p<3>; 717; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 718; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 719; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 720; CHECK-NOF16-EMPTY: 721; CHECK-NOF16-NEXT: // %bb.0: 722; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_une_param_1]; 723; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_une_param_0]; 724; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 725; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 726; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 727; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 728; CHECK-NOF16-NEXT: setp.neu.f32 %p1, %f2, %f1; 729; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs1; 730; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs3; 731; CHECK-NOF16-NEXT: setp.neu.f32 %p2, %f4, %f3; 732; CHECK-NOF16-NEXT: selp.u16 %rs5, -1, 0, %p2; 733; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; 734; CHECK-NOF16-NEXT: selp.u16 %rs6, -1, 0, %p1; 735; CHECK-NOF16-NEXT: st.param.b8 [func_retval0+1], %rs6; 736; CHECK-NOF16-NEXT: ret; 737 %r = fcmp une <2 x half> %a, %b 738 ret <2 x i1> %r 739} 740 741define <2 x i1> @test_fcmp_ueq(<2 x half> %a, <2 x half> %b) #0 { 742; CHECK-F16-LABEL: test_fcmp_ueq( 743; CHECK-F16: { 744; CHECK-F16-NEXT: .reg .pred %p<3>; 745; CHECK-F16-NEXT: .reg .b16 %rs<3>; 746; CHECK-F16-NEXT: .reg .b32 %r<3>; 747; CHECK-F16-EMPTY: 748; CHECK-F16-NEXT: // %bb.0: 749; CHECK-F16-NEXT: ld.param.b32 %r2, [test_fcmp_ueq_param_1]; 750; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fcmp_ueq_param_0]; 751; CHECK-F16-NEXT: setp.equ.f16x2 %p1|%p2, %r1, %r2; 752; CHECK-F16-NEXT: selp.u16 %rs1, -1, 0, %p1; 753; CHECK-F16-NEXT: st.param.b8 [func_retval0], %rs1; 754; CHECK-F16-NEXT: selp.u16 %rs2, -1, 0, %p2; 755; CHECK-F16-NEXT: st.param.b8 [func_retval0+1], %rs2; 756; CHECK-F16-NEXT: ret; 757; 758; CHECK-NOF16-LABEL: test_fcmp_ueq( 759; CHECK-NOF16: { 760; CHECK-NOF16-NEXT: .reg .pred %p<3>; 761; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 762; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 763; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 764; CHECK-NOF16-EMPTY: 765; CHECK-NOF16-NEXT: // %bb.0: 766; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ueq_param_1]; 767; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ueq_param_0]; 768; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 769; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 770; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 771; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 772; CHECK-NOF16-NEXT: setp.equ.f32 %p1, %f2, %f1; 773; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs1; 774; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs3; 775; CHECK-NOF16-NEXT: setp.equ.f32 %p2, %f4, %f3; 776; CHECK-NOF16-NEXT: selp.u16 %rs5, -1, 0, %p2; 777; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; 778; CHECK-NOF16-NEXT: selp.u16 %rs6, -1, 0, %p1; 779; CHECK-NOF16-NEXT: st.param.b8 [func_retval0+1], %rs6; 780; CHECK-NOF16-NEXT: ret; 781 %r = fcmp ueq <2 x half> %a, %b 782 ret <2 x i1> %r 783} 784 785define <2 x i1> @test_fcmp_ugt(<2 x half> %a, <2 x half> %b) #0 { 786; CHECK-F16-LABEL: test_fcmp_ugt( 787; CHECK-F16: { 788; CHECK-F16-NEXT: .reg .pred %p<3>; 789; CHECK-F16-NEXT: .reg .b16 %rs<3>; 790; CHECK-F16-NEXT: .reg .b32 %r<3>; 791; CHECK-F16-EMPTY: 792; CHECK-F16-NEXT: // %bb.0: 793; CHECK-F16-NEXT: ld.param.b32 %r2, [test_fcmp_ugt_param_1]; 794; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fcmp_ugt_param_0]; 795; CHECK-F16-NEXT: setp.gtu.f16x2 %p1|%p2, %r1, %r2; 796; CHECK-F16-NEXT: selp.u16 %rs1, -1, 0, %p1; 797; CHECK-F16-NEXT: st.param.b8 [func_retval0], %rs1; 798; CHECK-F16-NEXT: selp.u16 %rs2, -1, 0, %p2; 799; CHECK-F16-NEXT: st.param.b8 [func_retval0+1], %rs2; 800; CHECK-F16-NEXT: ret; 801; 802; CHECK-NOF16-LABEL: test_fcmp_ugt( 803; CHECK-NOF16: { 804; CHECK-NOF16-NEXT: .reg .pred %p<3>; 805; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 806; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 807; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 808; CHECK-NOF16-EMPTY: 809; CHECK-NOF16-NEXT: // %bb.0: 810; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ugt_param_1]; 811; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ugt_param_0]; 812; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 813; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 814; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 815; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 816; CHECK-NOF16-NEXT: setp.gtu.f32 %p1, %f2, %f1; 817; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs1; 818; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs3; 819; CHECK-NOF16-NEXT: setp.gtu.f32 %p2, %f4, %f3; 820; CHECK-NOF16-NEXT: selp.u16 %rs5, -1, 0, %p2; 821; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; 822; CHECK-NOF16-NEXT: selp.u16 %rs6, -1, 0, %p1; 823; CHECK-NOF16-NEXT: st.param.b8 [func_retval0+1], %rs6; 824; CHECK-NOF16-NEXT: ret; 825 %r = fcmp ugt <2 x half> %a, %b 826 ret <2 x i1> %r 827} 828 829define <2 x i1> @test_fcmp_uge(<2 x half> %a, <2 x half> %b) #0 { 830; CHECK-F16-LABEL: test_fcmp_uge( 831; CHECK-F16: { 832; CHECK-F16-NEXT: .reg .pred %p<3>; 833; CHECK-F16-NEXT: .reg .b16 %rs<3>; 834; CHECK-F16-NEXT: .reg .b32 %r<3>; 835; CHECK-F16-EMPTY: 836; CHECK-F16-NEXT: // %bb.0: 837; CHECK-F16-NEXT: ld.param.b32 %r2, [test_fcmp_uge_param_1]; 838; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fcmp_uge_param_0]; 839; CHECK-F16-NEXT: setp.geu.f16x2 %p1|%p2, %r1, %r2; 840; CHECK-F16-NEXT: selp.u16 %rs1, -1, 0, %p1; 841; CHECK-F16-NEXT: st.param.b8 [func_retval0], %rs1; 842; CHECK-F16-NEXT: selp.u16 %rs2, -1, 0, %p2; 843; CHECK-F16-NEXT: st.param.b8 [func_retval0+1], %rs2; 844; CHECK-F16-NEXT: ret; 845; 846; CHECK-NOF16-LABEL: test_fcmp_uge( 847; CHECK-NOF16: { 848; CHECK-NOF16-NEXT: .reg .pred %p<3>; 849; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 850; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 851; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 852; CHECK-NOF16-EMPTY: 853; CHECK-NOF16-NEXT: // %bb.0: 854; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_uge_param_1]; 855; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_uge_param_0]; 856; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 857; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 858; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 859; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 860; CHECK-NOF16-NEXT: setp.geu.f32 %p1, %f2, %f1; 861; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs1; 862; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs3; 863; CHECK-NOF16-NEXT: setp.geu.f32 %p2, %f4, %f3; 864; CHECK-NOF16-NEXT: selp.u16 %rs5, -1, 0, %p2; 865; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; 866; CHECK-NOF16-NEXT: selp.u16 %rs6, -1, 0, %p1; 867; CHECK-NOF16-NEXT: st.param.b8 [func_retval0+1], %rs6; 868; CHECK-NOF16-NEXT: ret; 869 %r = fcmp uge <2 x half> %a, %b 870 ret <2 x i1> %r 871} 872 873define <2 x i1> @test_fcmp_ult(<2 x half> %a, <2 x half> %b) #0 { 874; CHECK-F16-LABEL: test_fcmp_ult( 875; CHECK-F16: { 876; CHECK-F16-NEXT: .reg .pred %p<3>; 877; CHECK-F16-NEXT: .reg .b16 %rs<3>; 878; CHECK-F16-NEXT: .reg .b32 %r<3>; 879; CHECK-F16-EMPTY: 880; CHECK-F16-NEXT: // %bb.0: 881; CHECK-F16-NEXT: ld.param.b32 %r2, [test_fcmp_ult_param_1]; 882; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fcmp_ult_param_0]; 883; CHECK-F16-NEXT: setp.ltu.f16x2 %p1|%p2, %r1, %r2; 884; CHECK-F16-NEXT: selp.u16 %rs1, -1, 0, %p1; 885; CHECK-F16-NEXT: st.param.b8 [func_retval0], %rs1; 886; CHECK-F16-NEXT: selp.u16 %rs2, -1, 0, %p2; 887; CHECK-F16-NEXT: st.param.b8 [func_retval0+1], %rs2; 888; CHECK-F16-NEXT: ret; 889; 890; CHECK-NOF16-LABEL: test_fcmp_ult( 891; CHECK-NOF16: { 892; CHECK-NOF16-NEXT: .reg .pred %p<3>; 893; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 894; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 895; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 896; CHECK-NOF16-EMPTY: 897; CHECK-NOF16-NEXT: // %bb.0: 898; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ult_param_1]; 899; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ult_param_0]; 900; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 901; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 902; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 903; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 904; CHECK-NOF16-NEXT: setp.ltu.f32 %p1, %f2, %f1; 905; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs1; 906; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs3; 907; CHECK-NOF16-NEXT: setp.ltu.f32 %p2, %f4, %f3; 908; CHECK-NOF16-NEXT: selp.u16 %rs5, -1, 0, %p2; 909; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; 910; CHECK-NOF16-NEXT: selp.u16 %rs6, -1, 0, %p1; 911; CHECK-NOF16-NEXT: st.param.b8 [func_retval0+1], %rs6; 912; CHECK-NOF16-NEXT: ret; 913 %r = fcmp ult <2 x half> %a, %b 914 ret <2 x i1> %r 915} 916 917define <2 x i1> @test_fcmp_ule(<2 x half> %a, <2 x half> %b) #0 { 918; CHECK-F16-LABEL: test_fcmp_ule( 919; CHECK-F16: { 920; CHECK-F16-NEXT: .reg .pred %p<3>; 921; CHECK-F16-NEXT: .reg .b16 %rs<3>; 922; CHECK-F16-NEXT: .reg .b32 %r<3>; 923; CHECK-F16-EMPTY: 924; CHECK-F16-NEXT: // %bb.0: 925; CHECK-F16-NEXT: ld.param.b32 %r2, [test_fcmp_ule_param_1]; 926; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fcmp_ule_param_0]; 927; CHECK-F16-NEXT: setp.leu.f16x2 %p1|%p2, %r1, %r2; 928; CHECK-F16-NEXT: selp.u16 %rs1, -1, 0, %p1; 929; CHECK-F16-NEXT: st.param.b8 [func_retval0], %rs1; 930; CHECK-F16-NEXT: selp.u16 %rs2, -1, 0, %p2; 931; CHECK-F16-NEXT: st.param.b8 [func_retval0+1], %rs2; 932; CHECK-F16-NEXT: ret; 933; 934; CHECK-NOF16-LABEL: test_fcmp_ule( 935; CHECK-NOF16: { 936; CHECK-NOF16-NEXT: .reg .pred %p<3>; 937; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 938; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 939; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 940; CHECK-NOF16-EMPTY: 941; CHECK-NOF16-NEXT: // %bb.0: 942; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ule_param_1]; 943; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ule_param_0]; 944; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 945; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 946; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 947; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 948; CHECK-NOF16-NEXT: setp.leu.f32 %p1, %f2, %f1; 949; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs1; 950; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs3; 951; CHECK-NOF16-NEXT: setp.leu.f32 %p2, %f4, %f3; 952; CHECK-NOF16-NEXT: selp.u16 %rs5, -1, 0, %p2; 953; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; 954; CHECK-NOF16-NEXT: selp.u16 %rs6, -1, 0, %p1; 955; CHECK-NOF16-NEXT: st.param.b8 [func_retval0+1], %rs6; 956; CHECK-NOF16-NEXT: ret; 957 %r = fcmp ule <2 x half> %a, %b 958 ret <2 x i1> %r 959} 960 961 962define <2 x i1> @test_fcmp_uno(<2 x half> %a, <2 x half> %b) #0 { 963; CHECK-F16-LABEL: test_fcmp_uno( 964; CHECK-F16: { 965; CHECK-F16-NEXT: .reg .pred %p<3>; 966; CHECK-F16-NEXT: .reg .b16 %rs<3>; 967; CHECK-F16-NEXT: .reg .b32 %r<3>; 968; CHECK-F16-EMPTY: 969; CHECK-F16-NEXT: // %bb.0: 970; CHECK-F16-NEXT: ld.param.b32 %r2, [test_fcmp_uno_param_1]; 971; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fcmp_uno_param_0]; 972; CHECK-F16-NEXT: setp.nan.f16x2 %p1|%p2, %r1, %r2; 973; CHECK-F16-NEXT: selp.u16 %rs1, -1, 0, %p1; 974; CHECK-F16-NEXT: st.param.b8 [func_retval0], %rs1; 975; CHECK-F16-NEXT: selp.u16 %rs2, -1, 0, %p2; 976; CHECK-F16-NEXT: st.param.b8 [func_retval0+1], %rs2; 977; CHECK-F16-NEXT: ret; 978; 979; CHECK-NOF16-LABEL: test_fcmp_uno( 980; CHECK-NOF16: { 981; CHECK-NOF16-NEXT: .reg .pred %p<3>; 982; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 983; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 984; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 985; CHECK-NOF16-EMPTY: 986; CHECK-NOF16-NEXT: // %bb.0: 987; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_uno_param_1]; 988; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_uno_param_0]; 989; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 990; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 991; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 992; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 993; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %f2, %f1; 994; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs1; 995; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs3; 996; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %f4, %f3; 997; CHECK-NOF16-NEXT: selp.u16 %rs5, -1, 0, %p2; 998; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; 999; CHECK-NOF16-NEXT: selp.u16 %rs6, -1, 0, %p1; 1000; CHECK-NOF16-NEXT: st.param.b8 [func_retval0+1], %rs6; 1001; CHECK-NOF16-NEXT: ret; 1002 %r = fcmp uno <2 x half> %a, %b 1003 ret <2 x i1> %r 1004} 1005 1006define <2 x i1> @test_fcmp_one(<2 x half> %a, <2 x half> %b) #0 { 1007; CHECK-F16-LABEL: test_fcmp_one( 1008; CHECK-F16: { 1009; CHECK-F16-NEXT: .reg .pred %p<3>; 1010; CHECK-F16-NEXT: .reg .b16 %rs<3>; 1011; CHECK-F16-NEXT: .reg .b32 %r<3>; 1012; CHECK-F16-EMPTY: 1013; CHECK-F16-NEXT: // %bb.0: 1014; CHECK-F16-NEXT: ld.param.b32 %r2, [test_fcmp_one_param_1]; 1015; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fcmp_one_param_0]; 1016; CHECK-F16-NEXT: setp.ne.f16x2 %p1|%p2, %r1, %r2; 1017; CHECK-F16-NEXT: selp.u16 %rs1, -1, 0, %p1; 1018; CHECK-F16-NEXT: st.param.b8 [func_retval0], %rs1; 1019; CHECK-F16-NEXT: selp.u16 %rs2, -1, 0, %p2; 1020; CHECK-F16-NEXT: st.param.b8 [func_retval0+1], %rs2; 1021; CHECK-F16-NEXT: ret; 1022; 1023; CHECK-NOF16-LABEL: test_fcmp_one( 1024; CHECK-NOF16: { 1025; CHECK-NOF16-NEXT: .reg .pred %p<3>; 1026; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 1027; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 1028; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 1029; CHECK-NOF16-EMPTY: 1030; CHECK-NOF16-NEXT: // %bb.0: 1031; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_one_param_1]; 1032; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_one_param_0]; 1033; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 1034; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 1035; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 1036; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 1037; CHECK-NOF16-NEXT: setp.ne.f32 %p1, %f2, %f1; 1038; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs1; 1039; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs3; 1040; CHECK-NOF16-NEXT: setp.ne.f32 %p2, %f4, %f3; 1041; CHECK-NOF16-NEXT: selp.u16 %rs5, -1, 0, %p2; 1042; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; 1043; CHECK-NOF16-NEXT: selp.u16 %rs6, -1, 0, %p1; 1044; CHECK-NOF16-NEXT: st.param.b8 [func_retval0+1], %rs6; 1045; CHECK-NOF16-NEXT: ret; 1046 %r = fcmp one <2 x half> %a, %b 1047 ret <2 x i1> %r 1048} 1049 1050define <2 x i1> @test_fcmp_oeq(<2 x half> %a, <2 x half> %b) #0 { 1051; CHECK-F16-LABEL: test_fcmp_oeq( 1052; CHECK-F16: { 1053; CHECK-F16-NEXT: .reg .pred %p<3>; 1054; CHECK-F16-NEXT: .reg .b16 %rs<3>; 1055; CHECK-F16-NEXT: .reg .b32 %r<3>; 1056; CHECK-F16-EMPTY: 1057; CHECK-F16-NEXT: // %bb.0: 1058; CHECK-F16-NEXT: ld.param.b32 %r2, [test_fcmp_oeq_param_1]; 1059; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fcmp_oeq_param_0]; 1060; CHECK-F16-NEXT: setp.eq.f16x2 %p1|%p2, %r1, %r2; 1061; CHECK-F16-NEXT: selp.u16 %rs1, -1, 0, %p1; 1062; CHECK-F16-NEXT: st.param.b8 [func_retval0], %rs1; 1063; CHECK-F16-NEXT: selp.u16 %rs2, -1, 0, %p2; 1064; CHECK-F16-NEXT: st.param.b8 [func_retval0+1], %rs2; 1065; CHECK-F16-NEXT: ret; 1066; 1067; CHECK-NOF16-LABEL: test_fcmp_oeq( 1068; CHECK-NOF16: { 1069; CHECK-NOF16-NEXT: .reg .pred %p<3>; 1070; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 1071; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 1072; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 1073; CHECK-NOF16-EMPTY: 1074; CHECK-NOF16-NEXT: // %bb.0: 1075; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_oeq_param_1]; 1076; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_oeq_param_0]; 1077; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 1078; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 1079; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 1080; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 1081; CHECK-NOF16-NEXT: setp.eq.f32 %p1, %f2, %f1; 1082; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs1; 1083; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs3; 1084; CHECK-NOF16-NEXT: setp.eq.f32 %p2, %f4, %f3; 1085; CHECK-NOF16-NEXT: selp.u16 %rs5, -1, 0, %p2; 1086; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; 1087; CHECK-NOF16-NEXT: selp.u16 %rs6, -1, 0, %p1; 1088; CHECK-NOF16-NEXT: st.param.b8 [func_retval0+1], %rs6; 1089; CHECK-NOF16-NEXT: ret; 1090 %r = fcmp oeq <2 x half> %a, %b 1091 ret <2 x i1> %r 1092} 1093 1094define <2 x i1> @test_fcmp_ogt(<2 x half> %a, <2 x half> %b) #0 { 1095; CHECK-F16-LABEL: test_fcmp_ogt( 1096; CHECK-F16: { 1097; CHECK-F16-NEXT: .reg .pred %p<3>; 1098; CHECK-F16-NEXT: .reg .b16 %rs<3>; 1099; CHECK-F16-NEXT: .reg .b32 %r<3>; 1100; CHECK-F16-EMPTY: 1101; CHECK-F16-NEXT: // %bb.0: 1102; CHECK-F16-NEXT: ld.param.b32 %r2, [test_fcmp_ogt_param_1]; 1103; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fcmp_ogt_param_0]; 1104; CHECK-F16-NEXT: setp.gt.f16x2 %p1|%p2, %r1, %r2; 1105; CHECK-F16-NEXT: selp.u16 %rs1, -1, 0, %p1; 1106; CHECK-F16-NEXT: st.param.b8 [func_retval0], %rs1; 1107; CHECK-F16-NEXT: selp.u16 %rs2, -1, 0, %p2; 1108; CHECK-F16-NEXT: st.param.b8 [func_retval0+1], %rs2; 1109; CHECK-F16-NEXT: ret; 1110; 1111; CHECK-NOF16-LABEL: test_fcmp_ogt( 1112; CHECK-NOF16: { 1113; CHECK-NOF16-NEXT: .reg .pred %p<3>; 1114; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 1115; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 1116; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 1117; CHECK-NOF16-EMPTY: 1118; CHECK-NOF16-NEXT: // %bb.0: 1119; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ogt_param_1]; 1120; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ogt_param_0]; 1121; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 1122; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 1123; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 1124; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 1125; CHECK-NOF16-NEXT: setp.gt.f32 %p1, %f2, %f1; 1126; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs1; 1127; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs3; 1128; CHECK-NOF16-NEXT: setp.gt.f32 %p2, %f4, %f3; 1129; CHECK-NOF16-NEXT: selp.u16 %rs5, -1, 0, %p2; 1130; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; 1131; CHECK-NOF16-NEXT: selp.u16 %rs6, -1, 0, %p1; 1132; CHECK-NOF16-NEXT: st.param.b8 [func_retval0+1], %rs6; 1133; CHECK-NOF16-NEXT: ret; 1134 %r = fcmp ogt <2 x half> %a, %b 1135 ret <2 x i1> %r 1136} 1137 1138define <2 x i1> @test_fcmp_oge(<2 x half> %a, <2 x half> %b) #0 { 1139; CHECK-F16-LABEL: test_fcmp_oge( 1140; CHECK-F16: { 1141; CHECK-F16-NEXT: .reg .pred %p<3>; 1142; CHECK-F16-NEXT: .reg .b16 %rs<3>; 1143; CHECK-F16-NEXT: .reg .b32 %r<3>; 1144; CHECK-F16-EMPTY: 1145; CHECK-F16-NEXT: // %bb.0: 1146; CHECK-F16-NEXT: ld.param.b32 %r2, [test_fcmp_oge_param_1]; 1147; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fcmp_oge_param_0]; 1148; CHECK-F16-NEXT: setp.ge.f16x2 %p1|%p2, %r1, %r2; 1149; CHECK-F16-NEXT: selp.u16 %rs1, -1, 0, %p1; 1150; CHECK-F16-NEXT: st.param.b8 [func_retval0], %rs1; 1151; CHECK-F16-NEXT: selp.u16 %rs2, -1, 0, %p2; 1152; CHECK-F16-NEXT: st.param.b8 [func_retval0+1], %rs2; 1153; CHECK-F16-NEXT: ret; 1154; 1155; CHECK-NOF16-LABEL: test_fcmp_oge( 1156; CHECK-NOF16: { 1157; CHECK-NOF16-NEXT: .reg .pred %p<3>; 1158; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 1159; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 1160; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 1161; CHECK-NOF16-EMPTY: 1162; CHECK-NOF16-NEXT: // %bb.0: 1163; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_oge_param_1]; 1164; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_oge_param_0]; 1165; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 1166; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 1167; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 1168; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 1169; CHECK-NOF16-NEXT: setp.ge.f32 %p1, %f2, %f1; 1170; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs1; 1171; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs3; 1172; CHECK-NOF16-NEXT: setp.ge.f32 %p2, %f4, %f3; 1173; CHECK-NOF16-NEXT: selp.u16 %rs5, -1, 0, %p2; 1174; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; 1175; CHECK-NOF16-NEXT: selp.u16 %rs6, -1, 0, %p1; 1176; CHECK-NOF16-NEXT: st.param.b8 [func_retval0+1], %rs6; 1177; CHECK-NOF16-NEXT: ret; 1178 %r = fcmp oge <2 x half> %a, %b 1179 ret <2 x i1> %r 1180} 1181 1182define <2 x i1> @test_fcmp_olt(<2 x half> %a, <2 x half> %b) #0 { 1183; CHECK-F16-LABEL: test_fcmp_olt( 1184; CHECK-F16: { 1185; CHECK-F16-NEXT: .reg .pred %p<3>; 1186; CHECK-F16-NEXT: .reg .b16 %rs<3>; 1187; CHECK-F16-NEXT: .reg .b32 %r<3>; 1188; CHECK-F16-EMPTY: 1189; CHECK-F16-NEXT: // %bb.0: 1190; CHECK-F16-NEXT: ld.param.b32 %r2, [test_fcmp_olt_param_1]; 1191; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fcmp_olt_param_0]; 1192; CHECK-F16-NEXT: setp.lt.f16x2 %p1|%p2, %r1, %r2; 1193; CHECK-F16-NEXT: selp.u16 %rs1, -1, 0, %p1; 1194; CHECK-F16-NEXT: st.param.b8 [func_retval0], %rs1; 1195; CHECK-F16-NEXT: selp.u16 %rs2, -1, 0, %p2; 1196; CHECK-F16-NEXT: st.param.b8 [func_retval0+1], %rs2; 1197; CHECK-F16-NEXT: ret; 1198; 1199; CHECK-NOF16-LABEL: test_fcmp_olt( 1200; CHECK-NOF16: { 1201; CHECK-NOF16-NEXT: .reg .pred %p<3>; 1202; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 1203; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 1204; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 1205; CHECK-NOF16-EMPTY: 1206; CHECK-NOF16-NEXT: // %bb.0: 1207; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_olt_param_1]; 1208; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_olt_param_0]; 1209; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 1210; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 1211; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 1212; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 1213; CHECK-NOF16-NEXT: setp.lt.f32 %p1, %f2, %f1; 1214; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs1; 1215; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs3; 1216; CHECK-NOF16-NEXT: setp.lt.f32 %p2, %f4, %f3; 1217; CHECK-NOF16-NEXT: selp.u16 %rs5, -1, 0, %p2; 1218; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; 1219; CHECK-NOF16-NEXT: selp.u16 %rs6, -1, 0, %p1; 1220; CHECK-NOF16-NEXT: st.param.b8 [func_retval0+1], %rs6; 1221; CHECK-NOF16-NEXT: ret; 1222 %r = fcmp olt <2 x half> %a, %b 1223 ret <2 x i1> %r 1224} 1225 1226define <2 x i1> @test_fcmp_ole(<2 x half> %a, <2 x half> %b) #0 { 1227; CHECK-F16-LABEL: test_fcmp_ole( 1228; CHECK-F16: { 1229; CHECK-F16-NEXT: .reg .pred %p<3>; 1230; CHECK-F16-NEXT: .reg .b16 %rs<3>; 1231; CHECK-F16-NEXT: .reg .b32 %r<3>; 1232; CHECK-F16-EMPTY: 1233; CHECK-F16-NEXT: // %bb.0: 1234; CHECK-F16-NEXT: ld.param.b32 %r2, [test_fcmp_ole_param_1]; 1235; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fcmp_ole_param_0]; 1236; CHECK-F16-NEXT: setp.le.f16x2 %p1|%p2, %r1, %r2; 1237; CHECK-F16-NEXT: selp.u16 %rs1, -1, 0, %p1; 1238; CHECK-F16-NEXT: st.param.b8 [func_retval0], %rs1; 1239; CHECK-F16-NEXT: selp.u16 %rs2, -1, 0, %p2; 1240; CHECK-F16-NEXT: st.param.b8 [func_retval0+1], %rs2; 1241; CHECK-F16-NEXT: ret; 1242; 1243; CHECK-NOF16-LABEL: test_fcmp_ole( 1244; CHECK-NOF16: { 1245; CHECK-NOF16-NEXT: .reg .pred %p<3>; 1246; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 1247; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 1248; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 1249; CHECK-NOF16-EMPTY: 1250; CHECK-NOF16-NEXT: // %bb.0: 1251; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ole_param_1]; 1252; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ole_param_0]; 1253; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 1254; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 1255; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 1256; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 1257; CHECK-NOF16-NEXT: setp.le.f32 %p1, %f2, %f1; 1258; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs1; 1259; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs3; 1260; CHECK-NOF16-NEXT: setp.le.f32 %p2, %f4, %f3; 1261; CHECK-NOF16-NEXT: selp.u16 %rs5, -1, 0, %p2; 1262; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; 1263; CHECK-NOF16-NEXT: selp.u16 %rs6, -1, 0, %p1; 1264; CHECK-NOF16-NEXT: st.param.b8 [func_retval0+1], %rs6; 1265; CHECK-NOF16-NEXT: ret; 1266 %r = fcmp ole <2 x half> %a, %b 1267 ret <2 x i1> %r 1268} 1269 1270define <2 x i1> @test_fcmp_ord(<2 x half> %a, <2 x half> %b) #0 { 1271; CHECK-F16-LABEL: test_fcmp_ord( 1272; CHECK-F16: { 1273; CHECK-F16-NEXT: .reg .pred %p<3>; 1274; CHECK-F16-NEXT: .reg .b16 %rs<3>; 1275; CHECK-F16-NEXT: .reg .b32 %r<3>; 1276; CHECK-F16-EMPTY: 1277; CHECK-F16-NEXT: // %bb.0: 1278; CHECK-F16-NEXT: ld.param.b32 %r2, [test_fcmp_ord_param_1]; 1279; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fcmp_ord_param_0]; 1280; CHECK-F16-NEXT: setp.num.f16x2 %p1|%p2, %r1, %r2; 1281; CHECK-F16-NEXT: selp.u16 %rs1, -1, 0, %p1; 1282; CHECK-F16-NEXT: st.param.b8 [func_retval0], %rs1; 1283; CHECK-F16-NEXT: selp.u16 %rs2, -1, 0, %p2; 1284; CHECK-F16-NEXT: st.param.b8 [func_retval0+1], %rs2; 1285; CHECK-F16-NEXT: ret; 1286; 1287; CHECK-NOF16-LABEL: test_fcmp_ord( 1288; CHECK-NOF16: { 1289; CHECK-NOF16-NEXT: .reg .pred %p<3>; 1290; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 1291; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 1292; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 1293; CHECK-NOF16-EMPTY: 1294; CHECK-NOF16-NEXT: // %bb.0: 1295; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ord_param_1]; 1296; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ord_param_0]; 1297; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 1298; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 1299; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 1300; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 1301; CHECK-NOF16-NEXT: setp.num.f32 %p1, %f2, %f1; 1302; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs1; 1303; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs3; 1304; CHECK-NOF16-NEXT: setp.num.f32 %p2, %f4, %f3; 1305; CHECK-NOF16-NEXT: selp.u16 %rs5, -1, 0, %p2; 1306; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; 1307; CHECK-NOF16-NEXT: selp.u16 %rs6, -1, 0, %p1; 1308; CHECK-NOF16-NEXT: st.param.b8 [func_retval0+1], %rs6; 1309; CHECK-NOF16-NEXT: ret; 1310 %r = fcmp ord <2 x half> %a, %b 1311 ret <2 x i1> %r 1312} 1313 1314define <2 x i32> @test_fptosi_i32(<2 x half> %a) #0 { 1315; CHECK-LABEL: test_fptosi_i32( 1316; CHECK: { 1317; CHECK-NEXT: .reg .b16 %rs<3>; 1318; CHECK-NEXT: .reg .b32 %r<4>; 1319; CHECK-EMPTY: 1320; CHECK-NEXT: // %bb.0: 1321; CHECK-NEXT: ld.param.b32 %r1, [test_fptosi_i32_param_0]; 1322; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 1323; CHECK-NEXT: cvt.rzi.s32.f16 %r2, %rs2; 1324; CHECK-NEXT: cvt.rzi.s32.f16 %r3, %rs1; 1325; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r3, %r2}; 1326; CHECK-NEXT: ret; 1327 %r = fptosi <2 x half> %a to <2 x i32> 1328 ret <2 x i32> %r 1329} 1330 1331define <2 x i64> @test_fptosi_i64(<2 x half> %a) #0 { 1332; CHECK-LABEL: test_fptosi_i64( 1333; CHECK: { 1334; CHECK-NEXT: .reg .b16 %rs<3>; 1335; CHECK-NEXT: .reg .b32 %r<2>; 1336; CHECK-NEXT: .reg .b64 %rd<3>; 1337; CHECK-EMPTY: 1338; CHECK-NEXT: // %bb.0: 1339; CHECK-NEXT: ld.param.b32 %r1, [test_fptosi_i64_param_0]; 1340; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 1341; CHECK-NEXT: cvt.rzi.s64.f16 %rd1, %rs2; 1342; CHECK-NEXT: cvt.rzi.s64.f16 %rd2, %rs1; 1343; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd2, %rd1}; 1344; CHECK-NEXT: ret; 1345 %r = fptosi <2 x half> %a to <2 x i64> 1346 ret <2 x i64> %r 1347} 1348 1349define <2 x i32> @test_fptoui_2xi32(<2 x half> %a) #0 { 1350; CHECK-LABEL: test_fptoui_2xi32( 1351; CHECK: { 1352; CHECK-NEXT: .reg .b16 %rs<3>; 1353; CHECK-NEXT: .reg .b32 %r<4>; 1354; CHECK-EMPTY: 1355; CHECK-NEXT: // %bb.0: 1356; CHECK-NEXT: ld.param.b32 %r1, [test_fptoui_2xi32_param_0]; 1357; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 1358; CHECK-NEXT: cvt.rzi.u32.f16 %r2, %rs2; 1359; CHECK-NEXT: cvt.rzi.u32.f16 %r3, %rs1; 1360; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r3, %r2}; 1361; CHECK-NEXT: ret; 1362 %r = fptoui <2 x half> %a to <2 x i32> 1363 ret <2 x i32> %r 1364} 1365 1366define <2 x i64> @test_fptoui_2xi64(<2 x half> %a) #0 { 1367; CHECK-LABEL: test_fptoui_2xi64( 1368; CHECK: { 1369; CHECK-NEXT: .reg .b16 %rs<3>; 1370; CHECK-NEXT: .reg .b32 %r<2>; 1371; CHECK-NEXT: .reg .b64 %rd<3>; 1372; CHECK-EMPTY: 1373; CHECK-NEXT: // %bb.0: 1374; CHECK-NEXT: ld.param.b32 %r1, [test_fptoui_2xi64_param_0]; 1375; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 1376; CHECK-NEXT: cvt.rzi.u64.f16 %rd1, %rs2; 1377; CHECK-NEXT: cvt.rzi.u64.f16 %rd2, %rs1; 1378; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd2, %rd1}; 1379; CHECK-NEXT: ret; 1380 %r = fptoui <2 x half> %a to <2 x i64> 1381 ret <2 x i64> %r 1382} 1383 1384define <2 x half> @test_uitofp_2xi32(<2 x i32> %a) #0 { 1385; CHECK-LABEL: test_uitofp_2xi32( 1386; CHECK: { 1387; CHECK-NEXT: .reg .b16 %rs<3>; 1388; CHECK-NEXT: .reg .b32 %r<4>; 1389; CHECK-EMPTY: 1390; CHECK-NEXT: // %bb.0: 1391; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_uitofp_2xi32_param_0]; 1392; CHECK-NEXT: cvt.rn.f16.u32 %rs1, %r2; 1393; CHECK-NEXT: cvt.rn.f16.u32 %rs2, %r1; 1394; CHECK-NEXT: mov.b32 %r3, {%rs2, %rs1}; 1395; CHECK-NEXT: st.param.b32 [func_retval0], %r3; 1396; CHECK-NEXT: ret; 1397 %r = uitofp <2 x i32> %a to <2 x half> 1398 ret <2 x half> %r 1399} 1400 1401define <2 x half> @test_uitofp_2xi64(<2 x i64> %a) #0 { 1402; CHECK-LABEL: test_uitofp_2xi64( 1403; CHECK: { 1404; CHECK-NEXT: .reg .b16 %rs<3>; 1405; CHECK-NEXT: .reg .b32 %r<2>; 1406; CHECK-NEXT: .reg .b64 %rd<3>; 1407; CHECK-EMPTY: 1408; CHECK-NEXT: // %bb.0: 1409; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [test_uitofp_2xi64_param_0]; 1410; CHECK-NEXT: cvt.rn.f16.u64 %rs1, %rd2; 1411; CHECK-NEXT: cvt.rn.f16.u64 %rs2, %rd1; 1412; CHECK-NEXT: mov.b32 %r1, {%rs2, %rs1}; 1413; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 1414; CHECK-NEXT: ret; 1415 %r = uitofp <2 x i64> %a to <2 x half> 1416 ret <2 x half> %r 1417} 1418 1419define <2 x half> @test_sitofp_2xi32(<2 x i32> %a) #0 { 1420; CHECK-LABEL: test_sitofp_2xi32( 1421; CHECK: { 1422; CHECK-NEXT: .reg .b16 %rs<3>; 1423; CHECK-NEXT: .reg .b32 %r<4>; 1424; CHECK-EMPTY: 1425; CHECK-NEXT: // %bb.0: 1426; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_sitofp_2xi32_param_0]; 1427; CHECK-NEXT: cvt.rn.f16.s32 %rs1, %r2; 1428; CHECK-NEXT: cvt.rn.f16.s32 %rs2, %r1; 1429; CHECK-NEXT: mov.b32 %r3, {%rs2, %rs1}; 1430; CHECK-NEXT: st.param.b32 [func_retval0], %r3; 1431; CHECK-NEXT: ret; 1432 %r = sitofp <2 x i32> %a to <2 x half> 1433 ret <2 x half> %r 1434} 1435 1436define <2 x half> @test_sitofp_2xi64(<2 x i64> %a) #0 { 1437; CHECK-LABEL: test_sitofp_2xi64( 1438; CHECK: { 1439; CHECK-NEXT: .reg .b16 %rs<3>; 1440; CHECK-NEXT: .reg .b32 %r<2>; 1441; CHECK-NEXT: .reg .b64 %rd<3>; 1442; CHECK-EMPTY: 1443; CHECK-NEXT: // %bb.0: 1444; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [test_sitofp_2xi64_param_0]; 1445; CHECK-NEXT: cvt.rn.f16.s64 %rs1, %rd2; 1446; CHECK-NEXT: cvt.rn.f16.s64 %rs2, %rd1; 1447; CHECK-NEXT: mov.b32 %r1, {%rs2, %rs1}; 1448; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 1449; CHECK-NEXT: ret; 1450 %r = sitofp <2 x i64> %a to <2 x half> 1451 ret <2 x half> %r 1452} 1453 1454 1455define <2 x half> @test_uitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { 1456; CHECK-F16-LABEL: test_uitofp_2xi32_fadd( 1457; CHECK-F16: { 1458; CHECK-F16-NEXT: .reg .b16 %rs<3>; 1459; CHECK-F16-NEXT: .reg .b32 %r<6>; 1460; CHECK-F16-EMPTY: 1461; CHECK-F16-NEXT: // %bb.0: 1462; CHECK-F16-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_uitofp_2xi32_fadd_param_0]; 1463; CHECK-F16-NEXT: ld.param.b32 %r3, [test_uitofp_2xi32_fadd_param_1]; 1464; CHECK-F16-NEXT: cvt.rn.f16.u32 %rs1, %r2; 1465; CHECK-F16-NEXT: cvt.rn.f16.u32 %rs2, %r1; 1466; CHECK-F16-NEXT: mov.b32 %r4, {%rs2, %rs1}; 1467; CHECK-F16-NEXT: add.rn.f16x2 %r5, %r3, %r4; 1468; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r5; 1469; CHECK-F16-NEXT: ret; 1470; 1471; CHECK-NOF16-LABEL: test_uitofp_2xi32_fadd( 1472; CHECK-NOF16: { 1473; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 1474; CHECK-NOF16-NEXT: .reg .b32 %r<5>; 1475; CHECK-NOF16-NEXT: .reg .f32 %f<7>; 1476; CHECK-NOF16-EMPTY: 1477; CHECK-NOF16-NEXT: // %bb.0: 1478; CHECK-NOF16-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_uitofp_2xi32_fadd_param_0]; 1479; CHECK-NOF16-NEXT: ld.param.b32 %r3, [test_uitofp_2xi32_fadd_param_1]; 1480; CHECK-NOF16-NEXT: cvt.rn.f16.u32 %rs1, %r1; 1481; CHECK-NOF16-NEXT: cvt.rn.f16.u32 %rs2, %r2; 1482; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 1483; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r3; 1484; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 1485; CHECK-NOF16-NEXT: add.rn.f32 %f3, %f2, %f1; 1486; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %f3; 1487; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; 1488; CHECK-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; 1489; CHECK-NOF16-NEXT: add.rn.f32 %f6, %f5, %f4; 1490; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %f6; 1491; CHECK-NOF16-NEXT: mov.b32 %r4, {%rs6, %rs5}; 1492; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r4; 1493; CHECK-NOF16-NEXT: ret; 1494 %c = uitofp <2 x i32> %a to <2 x half> 1495 %r = fadd <2 x half> %b, %c 1496 ret <2 x half> %r 1497} 1498 1499define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { 1500; CHECK-F16-LABEL: test_sitofp_2xi32_fadd( 1501; CHECK-F16: { 1502; CHECK-F16-NEXT: .reg .b16 %rs<3>; 1503; CHECK-F16-NEXT: .reg .b32 %r<6>; 1504; CHECK-F16-EMPTY: 1505; CHECK-F16-NEXT: // %bb.0: 1506; CHECK-F16-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_sitofp_2xi32_fadd_param_0]; 1507; CHECK-F16-NEXT: ld.param.b32 %r3, [test_sitofp_2xi32_fadd_param_1]; 1508; CHECK-F16-NEXT: cvt.rn.f16.s32 %rs1, %r2; 1509; CHECK-F16-NEXT: cvt.rn.f16.s32 %rs2, %r1; 1510; CHECK-F16-NEXT: mov.b32 %r4, {%rs2, %rs1}; 1511; CHECK-F16-NEXT: add.rn.f16x2 %r5, %r3, %r4; 1512; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r5; 1513; CHECK-F16-NEXT: ret; 1514; 1515; CHECK-NOF16-LABEL: test_sitofp_2xi32_fadd( 1516; CHECK-NOF16: { 1517; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 1518; CHECK-NOF16-NEXT: .reg .b32 %r<5>; 1519; CHECK-NOF16-NEXT: .reg .f32 %f<7>; 1520; CHECK-NOF16-EMPTY: 1521; CHECK-NOF16-NEXT: // %bb.0: 1522; CHECK-NOF16-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_sitofp_2xi32_fadd_param_0]; 1523; CHECK-NOF16-NEXT: ld.param.b32 %r3, [test_sitofp_2xi32_fadd_param_1]; 1524; CHECK-NOF16-NEXT: cvt.rn.f16.s32 %rs1, %r1; 1525; CHECK-NOF16-NEXT: cvt.rn.f16.s32 %rs2, %r2; 1526; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 1527; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r3; 1528; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 1529; CHECK-NOF16-NEXT: add.rn.f32 %f3, %f2, %f1; 1530; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %f3; 1531; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; 1532; CHECK-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; 1533; CHECK-NOF16-NEXT: add.rn.f32 %f6, %f5, %f4; 1534; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %f6; 1535; CHECK-NOF16-NEXT: mov.b32 %r4, {%rs6, %rs5}; 1536; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r4; 1537; CHECK-NOF16-NEXT: ret; 1538 %c = sitofp <2 x i32> %a to <2 x half> 1539 %r = fadd <2 x half> %b, %c 1540 ret <2 x half> %r 1541} 1542 1543define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 { 1544; CHECK-LABEL: test_fptrunc_2xfloat( 1545; CHECK: { 1546; CHECK-NEXT: .reg .b16 %rs<3>; 1547; CHECK-NEXT: .reg .b32 %r<2>; 1548; CHECK-NEXT: .reg .f32 %f<3>; 1549; CHECK-EMPTY: 1550; CHECK-NEXT: // %bb.0: 1551; CHECK-NEXT: ld.param.v2.f32 {%f1, %f2}, [test_fptrunc_2xfloat_param_0]; 1552; CHECK-NEXT: cvt.rn.f16.f32 %rs1, %f2; 1553; CHECK-NEXT: cvt.rn.f16.f32 %rs2, %f1; 1554; CHECK-NEXT: mov.b32 %r1, {%rs2, %rs1}; 1555; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 1556; CHECK-NEXT: ret; 1557 %r = fptrunc <2 x float> %a to <2 x half> 1558 ret <2 x half> %r 1559} 1560 1561define <2 x half> @test_fptrunc_2xdouble(<2 x double> %a) #0 { 1562; CHECK-LABEL: test_fptrunc_2xdouble( 1563; CHECK: { 1564; CHECK-NEXT: .reg .b16 %rs<3>; 1565; CHECK-NEXT: .reg .b32 %r<2>; 1566; CHECK-NEXT: .reg .f64 %fd<3>; 1567; CHECK-EMPTY: 1568; CHECK-NEXT: // %bb.0: 1569; CHECK-NEXT: ld.param.v2.f64 {%fd1, %fd2}, [test_fptrunc_2xdouble_param_0]; 1570; CHECK-NEXT: cvt.rn.f16.f64 %rs1, %fd2; 1571; CHECK-NEXT: cvt.rn.f16.f64 %rs2, %fd1; 1572; CHECK-NEXT: mov.b32 %r1, {%rs2, %rs1}; 1573; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 1574; CHECK-NEXT: ret; 1575 %r = fptrunc <2 x double> %a to <2 x half> 1576 ret <2 x half> %r 1577} 1578 1579define <2 x float> @test_fpext_2xfloat(<2 x half> %a) #0 { 1580; CHECK-LABEL: test_fpext_2xfloat( 1581; CHECK: { 1582; CHECK-NEXT: .reg .b16 %rs<3>; 1583; CHECK-NEXT: .reg .b32 %r<2>; 1584; CHECK-NEXT: .reg .f32 %f<3>; 1585; CHECK-EMPTY: 1586; CHECK-NEXT: // %bb.0: 1587; CHECK-NEXT: ld.param.b32 %r1, [test_fpext_2xfloat_param_0]; 1588; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 1589; CHECK-NEXT: cvt.f32.f16 %f1, %rs2; 1590; CHECK-NEXT: cvt.f32.f16 %f2, %rs1; 1591; CHECK-NEXT: st.param.v2.f32 [func_retval0], {%f2, %f1}; 1592; CHECK-NEXT: ret; 1593 %r = fpext <2 x half> %a to <2 x float> 1594 ret <2 x float> %r 1595} 1596 1597define <2 x double> @test_fpext_2xdouble(<2 x half> %a) #0 { 1598; CHECK-LABEL: test_fpext_2xdouble( 1599; CHECK: { 1600; CHECK-NEXT: .reg .b16 %rs<3>; 1601; CHECK-NEXT: .reg .b32 %r<2>; 1602; CHECK-NEXT: .reg .f64 %fd<3>; 1603; CHECK-EMPTY: 1604; CHECK-NEXT: // %bb.0: 1605; CHECK-NEXT: ld.param.b32 %r1, [test_fpext_2xdouble_param_0]; 1606; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 1607; CHECK-NEXT: cvt.f64.f16 %fd1, %rs2; 1608; CHECK-NEXT: cvt.f64.f16 %fd2, %rs1; 1609; CHECK-NEXT: st.param.v2.f64 [func_retval0], {%fd2, %fd1}; 1610; CHECK-NEXT: ret; 1611 %r = fpext <2 x half> %a to <2 x double> 1612 ret <2 x double> %r 1613} 1614 1615 1616define <2 x i16> @test_bitcast_2xhalf_to_2xi16(<2 x half> %a) #0 { 1617; CHECK-LABEL: test_bitcast_2xhalf_to_2xi16( 1618; CHECK: { 1619; CHECK-NEXT: .reg .b32 %r<2>; 1620; CHECK-EMPTY: 1621; CHECK-NEXT: // %bb.0: 1622; CHECK-NEXT: ld.param.u32 %r1, [test_bitcast_2xhalf_to_2xi16_param_0]; 1623; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 1624; CHECK-NEXT: ret; 1625 %r = bitcast <2 x half> %a to <2 x i16> 1626 ret <2 x i16> %r 1627} 1628 1629define <2 x half> @test_bitcast_2xi16_to_2xhalf(<2 x i16> %a) #0 { 1630; CHECK-LABEL: test_bitcast_2xi16_to_2xhalf( 1631; CHECK: { 1632; CHECK-NEXT: .reg .b32 %r<2>; 1633; CHECK-EMPTY: 1634; CHECK-NEXT: // %bb.0: 1635; CHECK-NEXT: ld.param.u32 %r1, [test_bitcast_2xi16_to_2xhalf_param_0]; 1636; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 1637; CHECK-NEXT: ret; 1638 %r = bitcast <2 x i16> %a to <2 x half> 1639 ret <2 x half> %r 1640} 1641 1642define <2 x half> @test_bitcast_float_to_2xhalf(float %a) #0 { 1643; CHECK-LABEL: test_bitcast_float_to_2xhalf( 1644; CHECK: { 1645; CHECK-NEXT: .reg .b32 %r<2>; 1646; CHECK-NEXT: .reg .f32 %f<2>; 1647; CHECK-EMPTY: 1648; CHECK-NEXT: // %bb.0: 1649; CHECK-NEXT: ld.param.f32 %f1, [test_bitcast_float_to_2xhalf_param_0]; 1650; CHECK-NEXT: mov.b32 %r1, %f1; 1651; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 1652; CHECK-NEXT: ret; 1653 %r = bitcast float %a to <2 x half> 1654 ret <2 x half> %r 1655} 1656 1657define float @test_bitcast_2xhalf_to_float(<2 x half> %a) #0 { 1658; CHECK-LABEL: test_bitcast_2xhalf_to_float( 1659; CHECK: { 1660; CHECK-NEXT: .reg .b32 %r<2>; 1661; CHECK-NEXT: .reg .f32 %f<2>; 1662; CHECK-EMPTY: 1663; CHECK-NEXT: // %bb.0: 1664; CHECK-NEXT: ld.param.u32 %r1, [test_bitcast_2xhalf_to_float_param_0]; 1665; CHECK-NEXT: mov.b32 %f1, %r1; 1666; CHECK-NEXT: st.param.f32 [func_retval0], %f1; 1667; CHECK-NEXT: ret; 1668 %r = bitcast <2 x half> %a to float 1669 ret float %r 1670} 1671 1672declare <2 x half> @llvm.sqrt.f16(<2 x half> %a) #0 1673declare <2 x half> @llvm.powi.f16.i32(<2 x half> %a, <2 x i32> %b) #0 1674declare <2 x half> @llvm.sin.f16(<2 x half> %a) #0 1675declare <2 x half> @llvm.cos.f16(<2 x half> %a) #0 1676declare <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b) #0 1677declare <2 x half> @llvm.exp.f16(<2 x half> %a) #0 1678declare <2 x half> @llvm.exp2.f16(<2 x half> %a) #0 1679declare <2 x half> @llvm.log.f16(<2 x half> %a) #0 1680declare <2 x half> @llvm.log10.f16(<2 x half> %a) #0 1681declare <2 x half> @llvm.log2.f16(<2 x half> %a) #0 1682declare <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 1683declare <2 x half> @llvm.fabs.f16(<2 x half> %a) #0 1684declare <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b) #0 1685declare <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b) #0 1686declare <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) #0 1687declare <2 x half> @llvm.floor.f16(<2 x half> %a) #0 1688declare <2 x half> @llvm.ceil.f16(<2 x half> %a) #0 1689declare <2 x half> @llvm.trunc.f16(<2 x half> %a) #0 1690declare <2 x half> @llvm.rint.f16(<2 x half> %a) #0 1691declare <2 x half> @llvm.nearbyint.f16(<2 x half> %a) #0 1692declare <2 x half> @llvm.round.f16(<2 x half> %a) #0 1693declare <2 x half> @llvm.roundeven.f16(<2 x half> %a) #0 1694declare <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 1695 1696define <2 x half> @test_sqrt(<2 x half> %a) #0 { 1697; CHECK-LABEL: test_sqrt( 1698; CHECK: { 1699; CHECK-NEXT: .reg .b16 %rs<5>; 1700; CHECK-NEXT: .reg .b32 %r<3>; 1701; CHECK-NEXT: .reg .f32 %f<5>; 1702; CHECK-EMPTY: 1703; CHECK-NEXT: // %bb.0: 1704; CHECK-NEXT: ld.param.b32 %r1, [test_sqrt_param_0]; 1705; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 1706; CHECK-NEXT: cvt.f32.f16 %f1, %rs2; 1707; CHECK-NEXT: sqrt.rn.f32 %f2, %f1; 1708; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %f2; 1709; CHECK-NEXT: cvt.f32.f16 %f3, %rs1; 1710; CHECK-NEXT: sqrt.rn.f32 %f4, %f3; 1711; CHECK-NEXT: cvt.rn.f16.f32 %rs4, %f4; 1712; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; 1713; CHECK-NEXT: st.param.b32 [func_retval0], %r2; 1714; CHECK-NEXT: ret; 1715 %r = call <2 x half> @llvm.sqrt.f16(<2 x half> %a) 1716 ret <2 x half> %r 1717} 1718 1719;;; Can't do this yet: requires libcall. 1720; XCHECK-LABEL: test_powi( 1721;define <2 x half> @test_powi(<2 x half> %a, <2 x i32> %b) #0 { 1722; %r = call <2 x half> @llvm.powi.f16.i32(<2 x half> %a, <2 x i32> %b) 1723; ret <2 x half> %r 1724;} 1725 1726define <2 x half> @test_sin(<2 x half> %a) #0 #1 { 1727; CHECK-LABEL: test_sin( 1728; CHECK: { 1729; CHECK-NEXT: .reg .b16 %rs<5>; 1730; CHECK-NEXT: .reg .b32 %r<3>; 1731; CHECK-NEXT: .reg .f32 %f<5>; 1732; CHECK-EMPTY: 1733; CHECK-NEXT: // %bb.0: 1734; CHECK-NEXT: ld.param.b32 %r1, [test_sin_param_0]; 1735; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 1736; CHECK-NEXT: cvt.f32.f16 %f1, %rs2; 1737; CHECK-NEXT: sin.approx.f32 %f2, %f1; 1738; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %f2; 1739; CHECK-NEXT: cvt.f32.f16 %f3, %rs1; 1740; CHECK-NEXT: sin.approx.f32 %f4, %f3; 1741; CHECK-NEXT: cvt.rn.f16.f32 %rs4, %f4; 1742; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; 1743; CHECK-NEXT: st.param.b32 [func_retval0], %r2; 1744; CHECK-NEXT: ret; 1745 %r = call <2 x half> @llvm.sin.f16(<2 x half> %a) 1746 ret <2 x half> %r 1747} 1748 1749define <2 x half> @test_cos(<2 x half> %a) #0 #1 { 1750; CHECK-LABEL: test_cos( 1751; CHECK: { 1752; CHECK-NEXT: .reg .b16 %rs<5>; 1753; CHECK-NEXT: .reg .b32 %r<3>; 1754; CHECK-NEXT: .reg .f32 %f<5>; 1755; CHECK-EMPTY: 1756; CHECK-NEXT: // %bb.0: 1757; CHECK-NEXT: ld.param.b32 %r1, [test_cos_param_0]; 1758; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 1759; CHECK-NEXT: cvt.f32.f16 %f1, %rs2; 1760; CHECK-NEXT: cos.approx.f32 %f2, %f1; 1761; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %f2; 1762; CHECK-NEXT: cvt.f32.f16 %f3, %rs1; 1763; CHECK-NEXT: cos.approx.f32 %f4, %f3; 1764; CHECK-NEXT: cvt.rn.f16.f32 %rs4, %f4; 1765; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; 1766; CHECK-NEXT: st.param.b32 [func_retval0], %r2; 1767; CHECK-NEXT: ret; 1768 %r = call <2 x half> @llvm.cos.f16(<2 x half> %a) 1769 ret <2 x half> %r 1770} 1771 1772;;; Can't do this yet: requires libcall. 1773; XCHECK-LABEL: test_pow( 1774;define <2 x half> @test_pow(<2 x half> %a, <2 x half> %b) #0 { 1775; %r = call <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b) 1776; ret <2 x half> %r 1777;} 1778 1779;;; Can't do this yet: requires libcall. 1780; XCHECK-LABEL: test_exp( 1781;define <2 x half> @test_exp(<2 x half> %a) #0 { 1782; %r = call <2 x half> @llvm.exp.f16(<2 x half> %a) 1783; ret <2 x half> %r 1784;} 1785 1786;;; Can't do this yet: requires libcall. 1787; XCHECK-LABEL: test_exp2( 1788;define <2 x half> @test_exp2(<2 x half> %a) #0 { 1789; %r = call <2 x half> @llvm.exp2.f16(<2 x half> %a) 1790; ret <2 x half> %r 1791;} 1792 1793;;; Can't do this yet: requires libcall. 1794; XCHECK-LABEL: test_log( 1795;define <2 x half> @test_log(<2 x half> %a) #0 { 1796; %r = call <2 x half> @llvm.log.f16(<2 x half> %a) 1797; ret <2 x half> %r 1798;} 1799 1800;;; Can't do this yet: requires libcall. 1801; XCHECK-LABEL: test_log10( 1802;define <2 x half> @test_log10(<2 x half> %a) #0 { 1803; %r = call <2 x half> @llvm.log10.f16(<2 x half> %a) 1804; ret <2 x half> %r 1805;} 1806 1807;;; Can't do this yet: requires libcall. 1808; XCHECK-LABEL: test_log2( 1809;define <2 x half> @test_log2(<2 x half> %a) #0 { 1810; %r = call <2 x half> @llvm.log2.f16(<2 x half> %a) 1811; ret <2 x half> %r 1812;} 1813 1814 1815define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 { 1816; CHECK-F16-LABEL: test_fma( 1817; CHECK-F16: { 1818; CHECK-F16-NEXT: .reg .b32 %r<5>; 1819; CHECK-F16-EMPTY: 1820; CHECK-F16-NEXT: // %bb.0: 1821; CHECK-F16-NEXT: ld.param.b32 %r3, [test_fma_param_2]; 1822; CHECK-F16-NEXT: ld.param.b32 %r2, [test_fma_param_1]; 1823; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fma_param_0]; 1824; CHECK-F16-NEXT: fma.rn.f16x2 %r4, %r1, %r2, %r3; 1825; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r4; 1826; CHECK-F16-NEXT: ret; 1827; 1828; CHECK-NOF16-LABEL: test_fma( 1829; CHECK-NOF16: { 1830; CHECK-NOF16-NEXT: .reg .b16 %rs<9>; 1831; CHECK-NOF16-NEXT: .reg .b32 %r<5>; 1832; CHECK-NOF16-NEXT: .reg .f32 %f<9>; 1833; CHECK-NOF16-EMPTY: 1834; CHECK-NOF16-NEXT: // %bb.0: 1835; CHECK-NOF16-NEXT: ld.param.b32 %r3, [test_fma_param_2]; 1836; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fma_param_1]; 1837; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fma_param_0]; 1838; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r3; 1839; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 1840; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r2; 1841; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 1842; CHECK-NOF16-NEXT: mov.b32 {%rs5, %rs6}, %r1; 1843; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs6; 1844; CHECK-NOF16-NEXT: fma.rn.f32 %f4, %f3, %f2, %f1; 1845; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs7, %f4; 1846; CHECK-NOF16-NEXT: cvt.f32.f16 %f5, %rs1; 1847; CHECK-NOF16-NEXT: cvt.f32.f16 %f6, %rs3; 1848; CHECK-NOF16-NEXT: cvt.f32.f16 %f7, %rs5; 1849; CHECK-NOF16-NEXT: fma.rn.f32 %f8, %f7, %f6, %f5; 1850; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs8, %f8; 1851; CHECK-NOF16-NEXT: mov.b32 %r4, {%rs8, %rs7}; 1852; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r4; 1853; CHECK-NOF16-NEXT: ret; 1854 %r = call <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) 1855 ret <2 x half> %r 1856} 1857 1858define <2 x half> @test_fabs(<2 x half> %a) #0 { 1859; CHECK-F16-LABEL: test_fabs( 1860; CHECK-F16: { 1861; CHECK-F16-NEXT: .reg .b32 %r<3>; 1862; CHECK-F16-EMPTY: 1863; CHECK-F16-NEXT: // %bb.0: 1864; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fabs_param_0]; 1865; CHECK-F16-NEXT: and.b32 %r2, %r1, 2147450879; 1866; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r2; 1867; CHECK-F16-NEXT: ret; 1868; 1869; CHECK-NOF16-LABEL: test_fabs( 1870; CHECK-NOF16: { 1871; CHECK-NOF16-NEXT: .reg .b16 %rs<5>; 1872; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 1873; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 1874; CHECK-NOF16-EMPTY: 1875; CHECK-NOF16-NEXT: // %bb.0: 1876; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fabs_param_0]; 1877; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1; 1878; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 1879; CHECK-NOF16-NEXT: abs.f32 %f2, %f1; 1880; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %f2; 1881; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs1; 1882; CHECK-NOF16-NEXT: abs.f32 %f4, %f3; 1883; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs4, %f4; 1884; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs4, %rs3}; 1885; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r2; 1886; CHECK-NOF16-NEXT: ret; 1887 %r = call <2 x half> @llvm.fabs.f16(<2 x half> %a) 1888 ret <2 x half> %r 1889} 1890 1891define <2 x half> @test_minnum(<2 x half> %a, <2 x half> %b) #0 { 1892; CHECK-LABEL: test_minnum( 1893; CHECK: { 1894; CHECK-NEXT: .reg .b16 %rs<7>; 1895; CHECK-NEXT: .reg .b32 %r<4>; 1896; CHECK-NEXT: .reg .f32 %f<7>; 1897; CHECK-EMPTY: 1898; CHECK-NEXT: // %bb.0: 1899; CHECK-NEXT: ld.param.b32 %r2, [test_minnum_param_1]; 1900; CHECK-NEXT: ld.param.b32 %r1, [test_minnum_param_0]; 1901; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2; 1902; CHECK-NEXT: cvt.f32.f16 %f1, %rs2; 1903; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1; 1904; CHECK-NEXT: cvt.f32.f16 %f2, %rs4; 1905; CHECK-NEXT: min.f32 %f3, %f2, %f1; 1906; CHECK-NEXT: cvt.rn.f16.f32 %rs5, %f3; 1907; CHECK-NEXT: cvt.f32.f16 %f4, %rs1; 1908; CHECK-NEXT: cvt.f32.f16 %f5, %rs3; 1909; CHECK-NEXT: min.f32 %f6, %f5, %f4; 1910; CHECK-NEXT: cvt.rn.f16.f32 %rs6, %f6; 1911; CHECK-NEXT: mov.b32 %r3, {%rs6, %rs5}; 1912; CHECK-NEXT: st.param.b32 [func_retval0], %r3; 1913; CHECK-NEXT: ret; 1914 %r = call <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b) 1915 ret <2 x half> %r 1916} 1917 1918define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 { 1919; CHECK-LABEL: test_maxnum( 1920; CHECK: { 1921; CHECK-NEXT: .reg .b16 %rs<7>; 1922; CHECK-NEXT: .reg .b32 %r<4>; 1923; CHECK-NEXT: .reg .f32 %f<7>; 1924; CHECK-EMPTY: 1925; CHECK-NEXT: // %bb.0: 1926; CHECK-NEXT: ld.param.b32 %r2, [test_maxnum_param_1]; 1927; CHECK-NEXT: ld.param.b32 %r1, [test_maxnum_param_0]; 1928; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2; 1929; CHECK-NEXT: cvt.f32.f16 %f1, %rs2; 1930; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1; 1931; CHECK-NEXT: cvt.f32.f16 %f2, %rs4; 1932; CHECK-NEXT: max.f32 %f3, %f2, %f1; 1933; CHECK-NEXT: cvt.rn.f16.f32 %rs5, %f3; 1934; CHECK-NEXT: cvt.f32.f16 %f4, %rs1; 1935; CHECK-NEXT: cvt.f32.f16 %f5, %rs3; 1936; CHECK-NEXT: max.f32 %f6, %f5, %f4; 1937; CHECK-NEXT: cvt.rn.f16.f32 %rs6, %f6; 1938; CHECK-NEXT: mov.b32 %r3, {%rs6, %rs5}; 1939; CHECK-NEXT: st.param.b32 [func_retval0], %r3; 1940; CHECK-NEXT: ret; 1941 %r = call <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b) 1942 ret <2 x half> %r 1943} 1944 1945define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 { 1946; CHECK-F16-LABEL: test_copysign( 1947; CHECK-F16: { 1948; CHECK-F16-NEXT: .reg .b32 %r<6>; 1949; CHECK-F16-EMPTY: 1950; CHECK-F16-NEXT: // %bb.0: 1951; CHECK-F16-NEXT: ld.param.b32 %r2, [test_copysign_param_1]; 1952; CHECK-F16-NEXT: ld.param.b32 %r1, [test_copysign_param_0]; 1953; CHECK-F16-NEXT: and.b32 %r3, %r2, -2147450880; 1954; CHECK-F16-NEXT: and.b32 %r4, %r1, 2147450879; 1955; CHECK-F16-NEXT: or.b32 %r5, %r4, %r3; 1956; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r5; 1957; CHECK-F16-NEXT: ret; 1958; 1959; CHECK-NOF16-LABEL: test_copysign( 1960; CHECK-NOF16: { 1961; CHECK-NOF16-NEXT: .reg .b16 %rs<11>; 1962; CHECK-NOF16-NEXT: .reg .b32 %r<4>; 1963; CHECK-NOF16-EMPTY: 1964; CHECK-NOF16-NEXT: // %bb.0: 1965; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_copysign_param_1]; 1966; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_copysign_param_0]; 1967; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 1968; CHECK-NOF16-NEXT: and.b16 %rs3, %rs2, -32768; 1969; CHECK-NOF16-NEXT: mov.b32 {%rs4, %rs5}, %r1; 1970; CHECK-NOF16-NEXT: and.b16 %rs6, %rs5, 32767; 1971; CHECK-NOF16-NEXT: or.b16 %rs7, %rs6, %rs3; 1972; CHECK-NOF16-NEXT: and.b16 %rs8, %rs1, -32768; 1973; CHECK-NOF16-NEXT: and.b16 %rs9, %rs4, 32767; 1974; CHECK-NOF16-NEXT: or.b16 %rs10, %rs9, %rs8; 1975; CHECK-NOF16-NEXT: mov.b32 %r3, {%rs10, %rs7}; 1976; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r3; 1977; CHECK-NOF16-NEXT: ret; 1978 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) 1979 ret <2 x half> %r 1980} 1981 1982define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 { 1983; CHECK-F16-LABEL: test_copysign_f32( 1984; CHECK-F16: { 1985; CHECK-F16-NEXT: .reg .b16 %rs<3>; 1986; CHECK-F16-NEXT: .reg .b32 %r<6>; 1987; CHECK-F16-NEXT: .reg .f32 %f<3>; 1988; CHECK-F16-EMPTY: 1989; CHECK-F16-NEXT: // %bb.0: 1990; CHECK-F16-NEXT: ld.param.v2.f32 {%f1, %f2}, [test_copysign_f32_param_1]; 1991; CHECK-F16-NEXT: ld.param.b32 %r1, [test_copysign_f32_param_0]; 1992; CHECK-F16-NEXT: cvt.rn.f16.f32 %rs1, %f2; 1993; CHECK-F16-NEXT: cvt.rn.f16.f32 %rs2, %f1; 1994; CHECK-F16-NEXT: mov.b32 %r2, {%rs2, %rs1}; 1995; CHECK-F16-NEXT: and.b32 %r3, %r2, -2147450880; 1996; CHECK-F16-NEXT: and.b32 %r4, %r1, 2147450879; 1997; CHECK-F16-NEXT: or.b32 %r5, %r4, %r3; 1998; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r5; 1999; CHECK-F16-NEXT: ret; 2000; 2001; CHECK-NOF16-LABEL: test_copysign_f32( 2002; CHECK-NOF16: { 2003; CHECK-NOF16-NEXT: .reg .b16 %rs<9>; 2004; CHECK-NOF16-NEXT: .reg .b32 %r<7>; 2005; CHECK-NOF16-NEXT: .reg .f32 %f<3>; 2006; CHECK-NOF16-EMPTY: 2007; CHECK-NOF16-NEXT: // %bb.0: 2008; CHECK-NOF16-NEXT: ld.param.v2.f32 {%f1, %f2}, [test_copysign_f32_param_1]; 2009; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_copysign_f32_param_0]; 2010; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1; 2011; CHECK-NOF16-NEXT: and.b16 %rs3, %rs2, 32767; 2012; CHECK-NOF16-NEXT: mov.b32 %r2, %f2; 2013; CHECK-NOF16-NEXT: and.b32 %r3, %r2, -2147483648; 2014; CHECK-NOF16-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs4}, %r3; } 2015; CHECK-NOF16-NEXT: or.b16 %rs5, %rs3, %rs4; 2016; CHECK-NOF16-NEXT: and.b16 %rs6, %rs1, 32767; 2017; CHECK-NOF16-NEXT: mov.b32 %r4, %f1; 2018; CHECK-NOF16-NEXT: and.b32 %r5, %r4, -2147483648; 2019; CHECK-NOF16-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs7}, %r5; } 2020; CHECK-NOF16-NEXT: or.b16 %rs8, %rs6, %rs7; 2021; CHECK-NOF16-NEXT: mov.b32 %r6, {%rs8, %rs5}; 2022; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r6; 2023; CHECK-NOF16-NEXT: ret; 2024 %tb = fptrunc <2 x float> %b to <2 x half> 2025 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb) 2026 ret <2 x half> %r 2027} 2028 2029define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 { 2030; CHECK-F16-LABEL: test_copysign_f64( 2031; CHECK-F16: { 2032; CHECK-F16-NEXT: .reg .b16 %rs<3>; 2033; CHECK-F16-NEXT: .reg .b32 %r<6>; 2034; CHECK-F16-NEXT: .reg .f64 %fd<3>; 2035; CHECK-F16-EMPTY: 2036; CHECK-F16-NEXT: // %bb.0: 2037; CHECK-F16-NEXT: ld.param.v2.f64 {%fd1, %fd2}, [test_copysign_f64_param_1]; 2038; CHECK-F16-NEXT: ld.param.b32 %r1, [test_copysign_f64_param_0]; 2039; CHECK-F16-NEXT: cvt.rn.f16.f64 %rs1, %fd2; 2040; CHECK-F16-NEXT: cvt.rn.f16.f64 %rs2, %fd1; 2041; CHECK-F16-NEXT: mov.b32 %r2, {%rs2, %rs1}; 2042; CHECK-F16-NEXT: and.b32 %r3, %r2, -2147450880; 2043; CHECK-F16-NEXT: and.b32 %r4, %r1, 2147450879; 2044; CHECK-F16-NEXT: or.b32 %r5, %r4, %r3; 2045; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r5; 2046; CHECK-F16-NEXT: ret; 2047; 2048; CHECK-NOF16-LABEL: test_copysign_f64( 2049; CHECK-NOF16: { 2050; CHECK-NOF16-NEXT: .reg .b16 %rs<9>; 2051; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 2052; CHECK-NOF16-NEXT: .reg .b64 %rd<7>; 2053; CHECK-NOF16-NEXT: .reg .f64 %fd<3>; 2054; CHECK-NOF16-EMPTY: 2055; CHECK-NOF16-NEXT: // %bb.0: 2056; CHECK-NOF16-NEXT: ld.param.v2.f64 {%fd1, %fd2}, [test_copysign_f64_param_1]; 2057; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_copysign_f64_param_0]; 2058; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1; 2059; CHECK-NOF16-NEXT: and.b16 %rs3, %rs2, 32767; 2060; CHECK-NOF16-NEXT: mov.b64 %rd1, %fd2; 2061; CHECK-NOF16-NEXT: and.b64 %rd2, %rd1, -9223372036854775808; 2062; CHECK-NOF16-NEXT: shr.u64 %rd3, %rd2, 48; 2063; CHECK-NOF16-NEXT: cvt.u16.u64 %rs4, %rd3; 2064; CHECK-NOF16-NEXT: or.b16 %rs5, %rs3, %rs4; 2065; CHECK-NOF16-NEXT: and.b16 %rs6, %rs1, 32767; 2066; CHECK-NOF16-NEXT: mov.b64 %rd4, %fd1; 2067; CHECK-NOF16-NEXT: and.b64 %rd5, %rd4, -9223372036854775808; 2068; CHECK-NOF16-NEXT: shr.u64 %rd6, %rd5, 48; 2069; CHECK-NOF16-NEXT: cvt.u16.u64 %rs7, %rd6; 2070; CHECK-NOF16-NEXT: or.b16 %rs8, %rs6, %rs7; 2071; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs8, %rs5}; 2072; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r2; 2073; CHECK-NOF16-NEXT: ret; 2074 %tb = fptrunc <2 x double> %b to <2 x half> 2075 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb) 2076 ret <2 x half> %r 2077} 2078 2079define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 { 2080; CHECK-F16-LABEL: test_copysign_extended( 2081; CHECK-F16: { 2082; CHECK-F16-NEXT: .reg .b16 %rs<3>; 2083; CHECK-F16-NEXT: .reg .b32 %r<6>; 2084; CHECK-F16-NEXT: .reg .f32 %f<3>; 2085; CHECK-F16-EMPTY: 2086; CHECK-F16-NEXT: // %bb.0: 2087; CHECK-F16-NEXT: ld.param.b32 %r2, [test_copysign_extended_param_1]; 2088; CHECK-F16-NEXT: ld.param.b32 %r1, [test_copysign_extended_param_0]; 2089; CHECK-F16-NEXT: and.b32 %r3, %r2, -2147450880; 2090; CHECK-F16-NEXT: and.b32 %r4, %r1, 2147450879; 2091; CHECK-F16-NEXT: or.b32 %r5, %r4, %r3; 2092; CHECK-F16-NEXT: mov.b32 {%rs1, %rs2}, %r5; 2093; CHECK-F16-NEXT: cvt.f32.f16 %f1, %rs2; 2094; CHECK-F16-NEXT: cvt.f32.f16 %f2, %rs1; 2095; CHECK-F16-NEXT: st.param.v2.f32 [func_retval0], {%f2, %f1}; 2096; CHECK-F16-NEXT: ret; 2097; 2098; CHECK-NOF16-LABEL: test_copysign_extended( 2099; CHECK-NOF16: { 2100; CHECK-NOF16-NEXT: .reg .b16 %rs<11>; 2101; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 2102; CHECK-NOF16-NEXT: .reg .f32 %f<3>; 2103; CHECK-NOF16-EMPTY: 2104; CHECK-NOF16-NEXT: // %bb.0: 2105; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_copysign_extended_param_1]; 2106; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_copysign_extended_param_0]; 2107; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 2108; CHECK-NOF16-NEXT: and.b16 %rs3, %rs1, -32768; 2109; CHECK-NOF16-NEXT: mov.b32 {%rs4, %rs5}, %r1; 2110; CHECK-NOF16-NEXT: and.b16 %rs6, %rs4, 32767; 2111; CHECK-NOF16-NEXT: or.b16 %rs7, %rs6, %rs3; 2112; CHECK-NOF16-NEXT: and.b16 %rs8, %rs2, -32768; 2113; CHECK-NOF16-NEXT: and.b16 %rs9, %rs5, 32767; 2114; CHECK-NOF16-NEXT: or.b16 %rs10, %rs9, %rs8; 2115; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs10; 2116; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs7; 2117; CHECK-NOF16-NEXT: st.param.v2.f32 [func_retval0], {%f2, %f1}; 2118; CHECK-NOF16-NEXT: ret; 2119 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) 2120 %xr = fpext <2 x half> %r to <2 x float> 2121 ret <2 x float> %xr 2122} 2123 2124define <2 x half> @test_floor(<2 x half> %a) #0 { 2125; CHECK-LABEL: test_floor( 2126; CHECK: { 2127; CHECK-NEXT: .reg .b16 %rs<5>; 2128; CHECK-NEXT: .reg .b32 %r<3>; 2129; CHECK-EMPTY: 2130; CHECK-NEXT: // %bb.0: 2131; CHECK-NEXT: ld.param.b32 %r1, [test_floor_param_0]; 2132; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 2133; CHECK-NEXT: cvt.rmi.f16.f16 %rs3, %rs2; 2134; CHECK-NEXT: cvt.rmi.f16.f16 %rs4, %rs1; 2135; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; 2136; CHECK-NEXT: st.param.b32 [func_retval0], %r2; 2137; CHECK-NEXT: ret; 2138 %r = call <2 x half> @llvm.floor.f16(<2 x half> %a) 2139 ret <2 x half> %r 2140} 2141 2142define <2 x half> @test_ceil(<2 x half> %a) #0 { 2143; CHECK-LABEL: test_ceil( 2144; CHECK: { 2145; CHECK-NEXT: .reg .b16 %rs<5>; 2146; CHECK-NEXT: .reg .b32 %r<3>; 2147; CHECK-EMPTY: 2148; CHECK-NEXT: // %bb.0: 2149; CHECK-NEXT: ld.param.b32 %r1, [test_ceil_param_0]; 2150; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 2151; CHECK-NEXT: cvt.rpi.f16.f16 %rs3, %rs2; 2152; CHECK-NEXT: cvt.rpi.f16.f16 %rs4, %rs1; 2153; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; 2154; CHECK-NEXT: st.param.b32 [func_retval0], %r2; 2155; CHECK-NEXT: ret; 2156 %r = call <2 x half> @llvm.ceil.f16(<2 x half> %a) 2157 ret <2 x half> %r 2158} 2159 2160define <2 x half> @test_trunc(<2 x half> %a) #0 { 2161; CHECK-LABEL: test_trunc( 2162; CHECK: { 2163; CHECK-NEXT: .reg .b16 %rs<5>; 2164; CHECK-NEXT: .reg .b32 %r<3>; 2165; CHECK-EMPTY: 2166; CHECK-NEXT: // %bb.0: 2167; CHECK-NEXT: ld.param.b32 %r1, [test_trunc_param_0]; 2168; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 2169; CHECK-NEXT: cvt.rzi.f16.f16 %rs3, %rs2; 2170; CHECK-NEXT: cvt.rzi.f16.f16 %rs4, %rs1; 2171; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; 2172; CHECK-NEXT: st.param.b32 [func_retval0], %r2; 2173; CHECK-NEXT: ret; 2174 %r = call <2 x half> @llvm.trunc.f16(<2 x half> %a) 2175 ret <2 x half> %r 2176} 2177 2178define <2 x half> @test_rint(<2 x half> %a) #0 { 2179; CHECK-LABEL: test_rint( 2180; CHECK: { 2181; CHECK-NEXT: .reg .b16 %rs<5>; 2182; CHECK-NEXT: .reg .b32 %r<3>; 2183; CHECK-EMPTY: 2184; CHECK-NEXT: // %bb.0: 2185; CHECK-NEXT: ld.param.b32 %r1, [test_rint_param_0]; 2186; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 2187; CHECK-NEXT: cvt.rni.f16.f16 %rs3, %rs2; 2188; CHECK-NEXT: cvt.rni.f16.f16 %rs4, %rs1; 2189; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; 2190; CHECK-NEXT: st.param.b32 [func_retval0], %r2; 2191; CHECK-NEXT: ret; 2192 %r = call <2 x half> @llvm.rint.f16(<2 x half> %a) 2193 ret <2 x half> %r 2194} 2195 2196define <2 x half> @test_nearbyint(<2 x half> %a) #0 { 2197; CHECK-LABEL: test_nearbyint( 2198; CHECK: { 2199; CHECK-NEXT: .reg .b16 %rs<5>; 2200; CHECK-NEXT: .reg .b32 %r<3>; 2201; CHECK-EMPTY: 2202; CHECK-NEXT: // %bb.0: 2203; CHECK-NEXT: ld.param.b32 %r1, [test_nearbyint_param_0]; 2204; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 2205; CHECK-NEXT: cvt.rni.f16.f16 %rs3, %rs2; 2206; CHECK-NEXT: cvt.rni.f16.f16 %rs4, %rs1; 2207; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; 2208; CHECK-NEXT: st.param.b32 [func_retval0], %r2; 2209; CHECK-NEXT: ret; 2210 %r = call <2 x half> @llvm.nearbyint.f16(<2 x half> %a) 2211 ret <2 x half> %r 2212} 2213 2214define <2 x half> @test_roundeven(<2 x half> %a) #0 { 2215; CHECK-LABEL: test_roundeven( 2216; CHECK: { 2217; CHECK-NEXT: .reg .b16 %rs<5>; 2218; CHECK-NEXT: .reg .b32 %r<3>; 2219; CHECK-EMPTY: 2220; CHECK-NEXT: // %bb.0: 2221; CHECK-NEXT: ld.param.b32 %r1, [test_roundeven_param_0]; 2222; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 2223; CHECK-NEXT: cvt.rni.f16.f16 %rs3, %rs2; 2224; CHECK-NEXT: cvt.rni.f16.f16 %rs4, %rs1; 2225; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; 2226; CHECK-NEXT: st.param.b32 [func_retval0], %r2; 2227; CHECK-NEXT: ret; 2228 %r = call <2 x half> @llvm.roundeven.f16(<2 x half> %a) 2229 ret <2 x half> %r 2230} 2231 2232; check the use of sign mask and 0.5 to implement round 2233define <2 x half> @test_round(<2 x half> %a) #0 { 2234; CHECK-LABEL: test_round( 2235; CHECK: { 2236; CHECK-NEXT: .reg .pred %p<5>; 2237; CHECK-NEXT: .reg .b16 %rs<5>; 2238; CHECK-NEXT: .reg .b32 %r<9>; 2239; CHECK-NEXT: .reg .f32 %f<17>; 2240; CHECK-EMPTY: 2241; CHECK-NEXT: // %bb.0: 2242; CHECK-NEXT: ld.param.b32 %r1, [test_round_param_0]; 2243; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 2244; CHECK-NEXT: cvt.f32.f16 %f1, %rs2; 2245; CHECK-NEXT: mov.b32 %r2, %f1; 2246; CHECK-NEXT: and.b32 %r3, %r2, -2147483648; 2247; CHECK-NEXT: or.b32 %r4, %r3, 1056964608; 2248; CHECK-NEXT: mov.b32 %f2, %r4; 2249; CHECK-NEXT: add.rn.f32 %f3, %f1, %f2; 2250; CHECK-NEXT: cvt.rzi.f32.f32 %f4, %f3; 2251; CHECK-NEXT: abs.f32 %f5, %f1; 2252; CHECK-NEXT: setp.gt.f32 %p1, %f5, 0f4B000000; 2253; CHECK-NEXT: selp.f32 %f6, %f1, %f4, %p1; 2254; CHECK-NEXT: cvt.rzi.f32.f32 %f7, %f1; 2255; CHECK-NEXT: setp.lt.f32 %p2, %f5, 0f3F000000; 2256; CHECK-NEXT: selp.f32 %f8, %f7, %f6, %p2; 2257; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %f8; 2258; CHECK-NEXT: cvt.f32.f16 %f9, %rs1; 2259; CHECK-NEXT: mov.b32 %r5, %f9; 2260; CHECK-NEXT: and.b32 %r6, %r5, -2147483648; 2261; CHECK-NEXT: or.b32 %r7, %r6, 1056964608; 2262; CHECK-NEXT: mov.b32 %f10, %r7; 2263; CHECK-NEXT: add.rn.f32 %f11, %f9, %f10; 2264; CHECK-NEXT: cvt.rzi.f32.f32 %f12, %f11; 2265; CHECK-NEXT: abs.f32 %f13, %f9; 2266; CHECK-NEXT: setp.gt.f32 %p3, %f13, 0f4B000000; 2267; CHECK-NEXT: selp.f32 %f14, %f9, %f12, %p3; 2268; CHECK-NEXT: cvt.rzi.f32.f32 %f15, %f9; 2269; CHECK-NEXT: setp.lt.f32 %p4, %f13, 0f3F000000; 2270; CHECK-NEXT: selp.f32 %f16, %f15, %f14, %p4; 2271; CHECK-NEXT: cvt.rn.f16.f32 %rs4, %f16; 2272; CHECK-NEXT: mov.b32 %r8, {%rs4, %rs3}; 2273; CHECK-NEXT: st.param.b32 [func_retval0], %r8; 2274; CHECK-NEXT: ret; 2275 %r = call <2 x half> @llvm.round.f16(<2 x half> %a) 2276 ret <2 x half> %r 2277} 2278 2279define <2 x half> @test_fmuladd(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 { 2280; CHECK-F16-LABEL: test_fmuladd( 2281; CHECK-F16: { 2282; CHECK-F16-NEXT: .reg .b32 %r<5>; 2283; CHECK-F16-EMPTY: 2284; CHECK-F16-NEXT: // %bb.0: 2285; CHECK-F16-NEXT: ld.param.b32 %r3, [test_fmuladd_param_2]; 2286; CHECK-F16-NEXT: ld.param.b32 %r2, [test_fmuladd_param_1]; 2287; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fmuladd_param_0]; 2288; CHECK-F16-NEXT: fma.rn.f16x2 %r4, %r1, %r2, %r3; 2289; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r4; 2290; CHECK-F16-NEXT: ret; 2291; 2292; CHECK-NOF16-LABEL: test_fmuladd( 2293; CHECK-NOF16: { 2294; CHECK-NOF16-NEXT: .reg .b16 %rs<9>; 2295; CHECK-NOF16-NEXT: .reg .b32 %r<5>; 2296; CHECK-NOF16-NEXT: .reg .f32 %f<9>; 2297; CHECK-NOF16-EMPTY: 2298; CHECK-NOF16-NEXT: // %bb.0: 2299; CHECK-NOF16-NEXT: ld.param.b32 %r3, [test_fmuladd_param_2]; 2300; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fmuladd_param_1]; 2301; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fmuladd_param_0]; 2302; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r3; 2303; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 2304; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r2; 2305; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 2306; CHECK-NOF16-NEXT: mov.b32 {%rs5, %rs6}, %r1; 2307; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs6; 2308; CHECK-NOF16-NEXT: fma.rn.f32 %f4, %f3, %f2, %f1; 2309; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs7, %f4; 2310; CHECK-NOF16-NEXT: cvt.f32.f16 %f5, %rs1; 2311; CHECK-NOF16-NEXT: cvt.f32.f16 %f6, %rs3; 2312; CHECK-NOF16-NEXT: cvt.f32.f16 %f7, %rs5; 2313; CHECK-NOF16-NEXT: fma.rn.f32 %f8, %f7, %f6, %f5; 2314; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs8, %f8; 2315; CHECK-NOF16-NEXT: mov.b32 %r4, {%rs8, %rs7}; 2316; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r4; 2317; CHECK-NOF16-NEXT: ret; 2318 %r = call <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) 2319 ret <2 x half> %r 2320} 2321 2322define <2 x half> @test_shufflevector(<2 x half> %a) #0 { 2323; CHECK-LABEL: test_shufflevector( 2324; CHECK: { 2325; CHECK-NEXT: .reg .b16 %rs<3>; 2326; CHECK-NEXT: .reg .b32 %r<3>; 2327; CHECK-EMPTY: 2328; CHECK-NEXT: // %bb.0: 2329; CHECK-NEXT: ld.param.b32 %r1, [test_shufflevector_param_0]; 2330; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 2331; CHECK-NEXT: mov.b32 %r2, {%rs2, %rs1}; 2332; CHECK-NEXT: st.param.b32 [func_retval0], %r2; 2333; CHECK-NEXT: ret; 2334 %s = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0> 2335 ret <2 x half> %s 2336} 2337 2338define <2 x half> @test_insertelement(<2 x half> %a, half %x) #0 { 2339; CHECK-LABEL: test_insertelement( 2340; CHECK: { 2341; CHECK-NEXT: .reg .b16 %rs<3>; 2342; CHECK-NEXT: .reg .b32 %r<3>; 2343; CHECK-EMPTY: 2344; CHECK-NEXT: // %bb.0: 2345; CHECK-NEXT: ld.param.b16 %rs1, [test_insertelement_param_1]; 2346; CHECK-NEXT: ld.param.b32 %r1, [test_insertelement_param_0]; 2347; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {%rs2, tmp}, %r1; } 2348; CHECK-NEXT: mov.b32 %r2, {%rs2, %rs1}; 2349; CHECK-NEXT: st.param.b32 [func_retval0], %r2; 2350; CHECK-NEXT: ret; 2351 %i = insertelement <2 x half> %a, half %x, i64 1 2352 ret <2 x half> %i 2353} 2354 2355define <2 x half> @test_sitofp_2xi16_to_2xhalf(<2 x i16> %a) #0 { 2356; CHECK-LABEL: test_sitofp_2xi16_to_2xhalf( 2357; CHECK: { 2358; CHECK-NEXT: .reg .b16 %rs<5>; 2359; CHECK-NEXT: .reg .b32 %r<3>; 2360; CHECK-EMPTY: 2361; CHECK-NEXT: // %bb.0: 2362; CHECK-NEXT: ld.param.u32 %r1, [test_sitofp_2xi16_to_2xhalf_param_0]; 2363; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 2364; CHECK-NEXT: cvt.rn.f16.s16 %rs3, %rs2; 2365; CHECK-NEXT: cvt.rn.f16.s16 %rs4, %rs1; 2366; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; 2367; CHECK-NEXT: st.param.b32 [func_retval0], %r2; 2368; CHECK-NEXT: ret; 2369 %r = sitofp <2 x i16> %a to <2 x half> 2370 ret <2 x half> %r 2371} 2372 2373define <2 x half> @test_uitofp_2xi16_to_2xhalf(<2 x i16> %a) #0 { 2374; CHECK-LABEL: test_uitofp_2xi16_to_2xhalf( 2375; CHECK: { 2376; CHECK-NEXT: .reg .b16 %rs<5>; 2377; CHECK-NEXT: .reg .b32 %r<3>; 2378; CHECK-EMPTY: 2379; CHECK-NEXT: // %bb.0: 2380; CHECK-NEXT: ld.param.u32 %r1, [test_uitofp_2xi16_to_2xhalf_param_0]; 2381; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 2382; CHECK-NEXT: cvt.rn.f16.u16 %rs3, %rs2; 2383; CHECK-NEXT: cvt.rn.f16.u16 %rs4, %rs1; 2384; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; 2385; CHECK-NEXT: st.param.b32 [func_retval0], %r2; 2386; CHECK-NEXT: ret; 2387 %r = uitofp <2 x i16> %a to <2 x half> 2388 ret <2 x half> %r 2389} 2390 2391attributes #0 = { nounwind } 2392attributes #1 = { "unsafe-fp-math" = "true" } 2393