1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16 3; RUN: llc < %s -mcpu=sm_80 -mattr +ptx70 | FileCheck %s --check-prefixes=CHECK,CHECK-F16 4; RUN: llc < %s -mcpu=sm_80 -mattr +ptx70 --nvptx-no-f16-math | FileCheck %s --check-prefixes=CHECK,CHECK-SM80-NOF16 5; RUN: %if ptxas %{ llc < %s | %ptxas-verify %} 6; RUN: %if ptxas-11.0 %{ llc < %s -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %} 7; RUN: %if ptxas-11.0 %{ llc < %s -mcpu=sm_80 --nvptx-no-f16-math | %ptxas-verify -arch=sm_80 %} 8 9target triple = "nvptx64-nvidia-cuda" 10 11; Checks that llvm intrinsics for math functions are correctly lowered to PTX. 12 13declare float @llvm.ceil.f32(float) #0 14declare double @llvm.ceil.f64(double) #0 15declare float @llvm.floor.f32(float) #0 16declare double @llvm.floor.f64(double) #0 17declare float @llvm.round.f32(float) #0 18declare double @llvm.round.f64(double) #0 19declare float @llvm.nearbyint.f32(float) #0 20declare double @llvm.nearbyint.f64(double) #0 21declare float @llvm.rint.f32(float) #0 22declare double @llvm.rint.f64(double) #0 23declare float @llvm.roundeven.f32(float) #0 24declare double @llvm.roundeven.f64(double) #0 25declare float @llvm.trunc.f32(float) #0 26declare double @llvm.trunc.f64(double) #0 27declare float @llvm.fabs.f32(float) #0 28declare double @llvm.fabs.f64(double) #0 29declare half @llvm.minnum.f16(half, half) #0 30declare float @llvm.minnum.f32(float, float) #0 31declare double @llvm.minnum.f64(double, double) #0 32declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #0 33declare half @llvm.minimum.f16(half, half) #0 34declare float @llvm.minimum.f32(float, float) #0 35declare double @llvm.minimum.f64(double, double) #0 36declare <2 x half> @llvm.minimum.v2f16(<2 x half>, <2 x half>) #0 37declare half @llvm.maxnum.f16(half, half) #0 38declare float @llvm.maxnum.f32(float, float) #0 39declare double @llvm.maxnum.f64(double, double) #0 40declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #0 41declare half @llvm.maximum.f16(half, half) #0 42declare float @llvm.maximum.f32(float, float) #0 43declare double @llvm.maximum.f64(double, double) #0 44declare <2 x half> @llvm.maximum.v2f16(<2 x half>, <2 x half>) #0 45declare float @llvm.fma.f32(float, float, float) #0 46declare double @llvm.fma.f64(double, double, double) #0 47 48; ---- ceil ---- 49 50define float @ceil_float(float %a) { 51; CHECK-LABEL: ceil_float( 52; CHECK: { 53; CHECK-NEXT: .reg .f32 %f<3>; 54; CHECK-EMPTY: 55; CHECK-NEXT: // %bb.0: 56; CHECK-NEXT: ld.param.f32 %f1, [ceil_float_param_0]; 57; CHECK-NEXT: cvt.rpi.f32.f32 %f2, %f1; 58; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 59; CHECK-NEXT: ret; 60 %b = call float @llvm.ceil.f32(float %a) 61 ret float %b 62} 63 64define float @ceil_float_ftz(float %a) #1 { 65; CHECK-LABEL: ceil_float_ftz( 66; CHECK: { 67; CHECK-NEXT: .reg .f32 %f<3>; 68; CHECK-EMPTY: 69; CHECK-NEXT: // %bb.0: 70; CHECK-NEXT: ld.param.f32 %f1, [ceil_float_ftz_param_0]; 71; CHECK-NEXT: cvt.rpi.ftz.f32.f32 %f2, %f1; 72; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 73; CHECK-NEXT: ret; 74 %b = call float @llvm.ceil.f32(float %a) 75 ret float %b 76} 77 78define double @ceil_double(double %a) { 79; CHECK-LABEL: ceil_double( 80; CHECK: { 81; CHECK-NEXT: .reg .f64 %fd<3>; 82; CHECK-EMPTY: 83; CHECK-NEXT: // %bb.0: 84; CHECK-NEXT: ld.param.f64 %fd1, [ceil_double_param_0]; 85; CHECK-NEXT: cvt.rpi.f64.f64 %fd2, %fd1; 86; CHECK-NEXT: st.param.f64 [func_retval0], %fd2; 87; CHECK-NEXT: ret; 88 %b = call double @llvm.ceil.f64(double %a) 89 ret double %b 90} 91 92; ---- floor ---- 93 94define float @floor_float(float %a) { 95; CHECK-LABEL: floor_float( 96; CHECK: { 97; CHECK-NEXT: .reg .f32 %f<3>; 98; CHECK-EMPTY: 99; CHECK-NEXT: // %bb.0: 100; CHECK-NEXT: ld.param.f32 %f1, [floor_float_param_0]; 101; CHECK-NEXT: cvt.rmi.f32.f32 %f2, %f1; 102; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 103; CHECK-NEXT: ret; 104 %b = call float @llvm.floor.f32(float %a) 105 ret float %b 106} 107 108define float @floor_float_ftz(float %a) #1 { 109; CHECK-LABEL: floor_float_ftz( 110; CHECK: { 111; CHECK-NEXT: .reg .f32 %f<3>; 112; CHECK-EMPTY: 113; CHECK-NEXT: // %bb.0: 114; CHECK-NEXT: ld.param.f32 %f1, [floor_float_ftz_param_0]; 115; CHECK-NEXT: cvt.rmi.ftz.f32.f32 %f2, %f1; 116; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 117; CHECK-NEXT: ret; 118 %b = call float @llvm.floor.f32(float %a) 119 ret float %b 120} 121 122define double @floor_double(double %a) { 123; CHECK-LABEL: floor_double( 124; CHECK: { 125; CHECK-NEXT: .reg .f64 %fd<3>; 126; CHECK-EMPTY: 127; CHECK-NEXT: // %bb.0: 128; CHECK-NEXT: ld.param.f64 %fd1, [floor_double_param_0]; 129; CHECK-NEXT: cvt.rmi.f64.f64 %fd2, %fd1; 130; CHECK-NEXT: st.param.f64 [func_retval0], %fd2; 131; CHECK-NEXT: ret; 132 %b = call double @llvm.floor.f64(double %a) 133 ret double %b 134} 135 136; ---- round ---- 137 138define float @round_float(float %a) { 139; check the use of sign mask and 0.5 to implement round 140; CHECK-LABEL: round_float( 141; CHECK: { 142; CHECK-NEXT: .reg .pred %p<3>; 143; CHECK-NEXT: .reg .b32 %r<4>; 144; CHECK-NEXT: .reg .f32 %f<9>; 145; CHECK-EMPTY: 146; CHECK-NEXT: // %bb.0: 147; CHECK-NEXT: ld.param.f32 %f1, [round_float_param_0]; 148; CHECK-NEXT: mov.b32 %r1, %f1; 149; CHECK-NEXT: and.b32 %r2, %r1, -2147483648; 150; CHECK-NEXT: or.b32 %r3, %r2, 1056964608; 151; CHECK-NEXT: mov.b32 %f2, %r3; 152; CHECK-NEXT: add.rn.f32 %f3, %f1, %f2; 153; CHECK-NEXT: cvt.rzi.f32.f32 %f4, %f3; 154; CHECK-NEXT: abs.f32 %f5, %f1; 155; CHECK-NEXT: setp.gt.f32 %p1, %f5, 0f4B000000; 156; CHECK-NEXT: selp.f32 %f6, %f1, %f4, %p1; 157; CHECK-NEXT: cvt.rzi.f32.f32 %f7, %f1; 158; CHECK-NEXT: setp.lt.f32 %p2, %f5, 0f3F000000; 159; CHECK-NEXT: selp.f32 %f8, %f7, %f6, %p2; 160; CHECK-NEXT: st.param.f32 [func_retval0], %f8; 161; CHECK-NEXT: ret; 162 %b = call float @llvm.round.f32(float %a) 163 ret float %b 164} 165 166define float @round_float_ftz(float %a) #1 { 167; check the use of sign mask and 0.5 to implement round 168; CHECK-LABEL: round_float_ftz( 169; CHECK: { 170; CHECK-NEXT: .reg .pred %p<3>; 171; CHECK-NEXT: .reg .b32 %r<4>; 172; CHECK-NEXT: .reg .f32 %f<9>; 173; CHECK-EMPTY: 174; CHECK-NEXT: // %bb.0: 175; CHECK-NEXT: ld.param.f32 %f1, [round_float_ftz_param_0]; 176; CHECK-NEXT: mov.b32 %r1, %f1; 177; CHECK-NEXT: and.b32 %r2, %r1, -2147483648; 178; CHECK-NEXT: or.b32 %r3, %r2, 1056964608; 179; CHECK-NEXT: mov.b32 %f2, %r3; 180; CHECK-NEXT: add.rn.ftz.f32 %f3, %f1, %f2; 181; CHECK-NEXT: cvt.rzi.ftz.f32.f32 %f4, %f3; 182; CHECK-NEXT: abs.ftz.f32 %f5, %f1; 183; CHECK-NEXT: setp.gt.ftz.f32 %p1, %f5, 0f4B000000; 184; CHECK-NEXT: selp.f32 %f6, %f1, %f4, %p1; 185; CHECK-NEXT: cvt.rzi.ftz.f32.f32 %f7, %f1; 186; CHECK-NEXT: setp.lt.ftz.f32 %p2, %f5, 0f3F000000; 187; CHECK-NEXT: selp.f32 %f8, %f7, %f6, %p2; 188; CHECK-NEXT: st.param.f32 [func_retval0], %f8; 189; CHECK-NEXT: ret; 190 %b = call float @llvm.round.f32(float %a) 191 ret float %b 192} 193 194define double @round_double(double %a) { 195; check the use of 0.5 to implement round 196; CHECK-LABEL: round_double( 197; CHECK: { 198; CHECK-NEXT: .reg .pred %p<3>; 199; CHECK-NEXT: .reg .f64 %fd<8>; 200; CHECK-EMPTY: 201; CHECK-NEXT: // %bb.0: 202; CHECK-NEXT: ld.param.f64 %fd1, [round_double_param_0]; 203; CHECK-NEXT: abs.f64 %fd2, %fd1; 204; CHECK-NEXT: setp.lt.f64 %p1, %fd2, 0d3FE0000000000000; 205; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FE0000000000000; 206; CHECK-NEXT: cvt.rzi.f64.f64 %fd4, %fd3; 207; CHECK-NEXT: selp.f64 %fd5, 0d0000000000000000, %fd4, %p1; 208; CHECK-NEXT: copysign.f64 %fd6, %fd1, %fd5; 209; CHECK-NEXT: setp.gt.f64 %p2, %fd2, 0d4330000000000000; 210; CHECK-NEXT: selp.f64 %fd7, %fd1, %fd6, %p2; 211; CHECK-NEXT: st.param.f64 [func_retval0], %fd7; 212; CHECK-NEXT: ret; 213 %b = call double @llvm.round.f64(double %a) 214 ret double %b 215} 216 217; ---- nearbyint ---- 218 219define float @nearbyint_float(float %a) { 220; CHECK-LABEL: nearbyint_float( 221; CHECK: { 222; CHECK-NEXT: .reg .f32 %f<3>; 223; CHECK-EMPTY: 224; CHECK-NEXT: // %bb.0: 225; CHECK-NEXT: ld.param.f32 %f1, [nearbyint_float_param_0]; 226; CHECK-NEXT: cvt.rni.f32.f32 %f2, %f1; 227; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 228; CHECK-NEXT: ret; 229 %b = call float @llvm.nearbyint.f32(float %a) 230 ret float %b 231} 232 233define float @nearbyint_float_ftz(float %a) #1 { 234; CHECK-LABEL: nearbyint_float_ftz( 235; CHECK: { 236; CHECK-NEXT: .reg .f32 %f<3>; 237; CHECK-EMPTY: 238; CHECK-NEXT: // %bb.0: 239; CHECK-NEXT: ld.param.f32 %f1, [nearbyint_float_ftz_param_0]; 240; CHECK-NEXT: cvt.rni.ftz.f32.f32 %f2, %f1; 241; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 242; CHECK-NEXT: ret; 243 %b = call float @llvm.nearbyint.f32(float %a) 244 ret float %b 245} 246 247define double @nearbyint_double(double %a) { 248; CHECK-LABEL: nearbyint_double( 249; CHECK: { 250; CHECK-NEXT: .reg .f64 %fd<3>; 251; CHECK-EMPTY: 252; CHECK-NEXT: // %bb.0: 253; CHECK-NEXT: ld.param.f64 %fd1, [nearbyint_double_param_0]; 254; CHECK-NEXT: cvt.rni.f64.f64 %fd2, %fd1; 255; CHECK-NEXT: st.param.f64 [func_retval0], %fd2; 256; CHECK-NEXT: ret; 257 %b = call double @llvm.nearbyint.f64(double %a) 258 ret double %b 259} 260 261; ---- rint ---- 262 263define float @rint_float(float %a) { 264; CHECK-LABEL: rint_float( 265; CHECK: { 266; CHECK-NEXT: .reg .f32 %f<3>; 267; CHECK-EMPTY: 268; CHECK-NEXT: // %bb.0: 269; CHECK-NEXT: ld.param.f32 %f1, [rint_float_param_0]; 270; CHECK-NEXT: cvt.rni.f32.f32 %f2, %f1; 271; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 272; CHECK-NEXT: ret; 273 %b = call float @llvm.rint.f32(float %a) 274 ret float %b 275} 276 277define float @rint_float_ftz(float %a) #1 { 278; CHECK-LABEL: rint_float_ftz( 279; CHECK: { 280; CHECK-NEXT: .reg .f32 %f<3>; 281; CHECK-EMPTY: 282; CHECK-NEXT: // %bb.0: 283; CHECK-NEXT: ld.param.f32 %f1, [rint_float_ftz_param_0]; 284; CHECK-NEXT: cvt.rni.ftz.f32.f32 %f2, %f1; 285; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 286; CHECK-NEXT: ret; 287 %b = call float @llvm.rint.f32(float %a) 288 ret float %b 289} 290 291define double @rint_double(double %a) { 292; CHECK-LABEL: rint_double( 293; CHECK: { 294; CHECK-NEXT: .reg .f64 %fd<3>; 295; CHECK-EMPTY: 296; CHECK-NEXT: // %bb.0: 297; CHECK-NEXT: ld.param.f64 %fd1, [rint_double_param_0]; 298; CHECK-NEXT: cvt.rni.f64.f64 %fd2, %fd1; 299; CHECK-NEXT: st.param.f64 [func_retval0], %fd2; 300; CHECK-NEXT: ret; 301 %b = call double @llvm.rint.f64(double %a) 302 ret double %b 303} 304 305; ---- roundeven ---- 306 307define float @roundeven_float(float %a) { 308; CHECK-LABEL: roundeven_float( 309; CHECK: { 310; CHECK-NEXT: .reg .f32 %f<3>; 311; CHECK-EMPTY: 312; CHECK-NEXT: // %bb.0: 313; CHECK-NEXT: ld.param.f32 %f1, [roundeven_float_param_0]; 314; CHECK-NEXT: cvt.rni.f32.f32 %f2, %f1; 315; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 316; CHECK-NEXT: ret; 317 %b = call float @llvm.roundeven.f32(float %a) 318 ret float %b 319} 320 321define float @roundeven_float_ftz(float %a) #1 { 322; CHECK-LABEL: roundeven_float_ftz( 323; CHECK: { 324; CHECK-NEXT: .reg .f32 %f<3>; 325; CHECK-EMPTY: 326; CHECK-NEXT: // %bb.0: 327; CHECK-NEXT: ld.param.f32 %f1, [roundeven_float_ftz_param_0]; 328; CHECK-NEXT: cvt.rni.ftz.f32.f32 %f2, %f1; 329; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 330; CHECK-NEXT: ret; 331 %b = call float @llvm.roundeven.f32(float %a) 332 ret float %b 333} 334 335define double @roundeven_double(double %a) { 336; CHECK-LABEL: roundeven_double( 337; CHECK: { 338; CHECK-NEXT: .reg .f64 %fd<3>; 339; CHECK-EMPTY: 340; CHECK-NEXT: // %bb.0: 341; CHECK-NEXT: ld.param.f64 %fd1, [roundeven_double_param_0]; 342; CHECK-NEXT: cvt.rni.f64.f64 %fd2, %fd1; 343; CHECK-NEXT: st.param.f64 [func_retval0], %fd2; 344; CHECK-NEXT: ret; 345 %b = call double @llvm.roundeven.f64(double %a) 346 ret double %b 347} 348 349; ---- trunc ---- 350 351define float @trunc_float(float %a) { 352; CHECK-LABEL: trunc_float( 353; CHECK: { 354; CHECK-NEXT: .reg .f32 %f<3>; 355; CHECK-EMPTY: 356; CHECK-NEXT: // %bb.0: 357; CHECK-NEXT: ld.param.f32 %f1, [trunc_float_param_0]; 358; CHECK-NEXT: cvt.rzi.f32.f32 %f2, %f1; 359; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 360; CHECK-NEXT: ret; 361 %b = call float @llvm.trunc.f32(float %a) 362 ret float %b 363} 364 365define float @trunc_float_ftz(float %a) #1 { 366; CHECK-LABEL: trunc_float_ftz( 367; CHECK: { 368; CHECK-NEXT: .reg .f32 %f<3>; 369; CHECK-EMPTY: 370; CHECK-NEXT: // %bb.0: 371; CHECK-NEXT: ld.param.f32 %f1, [trunc_float_ftz_param_0]; 372; CHECK-NEXT: cvt.rzi.ftz.f32.f32 %f2, %f1; 373; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 374; CHECK-NEXT: ret; 375 %b = call float @llvm.trunc.f32(float %a) 376 ret float %b 377} 378 379define double @trunc_double(double %a) { 380; CHECK-LABEL: trunc_double( 381; CHECK: { 382; CHECK-NEXT: .reg .f64 %fd<3>; 383; CHECK-EMPTY: 384; CHECK-NEXT: // %bb.0: 385; CHECK-NEXT: ld.param.f64 %fd1, [trunc_double_param_0]; 386; CHECK-NEXT: cvt.rzi.f64.f64 %fd2, %fd1; 387; CHECK-NEXT: st.param.f64 [func_retval0], %fd2; 388; CHECK-NEXT: ret; 389 %b = call double @llvm.trunc.f64(double %a) 390 ret double %b 391} 392 393; ---- abs ---- 394 395define float @abs_float(float %a) { 396; CHECK-LABEL: abs_float( 397; CHECK: { 398; CHECK-NEXT: .reg .f32 %f<3>; 399; CHECK-EMPTY: 400; CHECK-NEXT: // %bb.0: 401; CHECK-NEXT: ld.param.f32 %f1, [abs_float_param_0]; 402; CHECK-NEXT: abs.f32 %f2, %f1; 403; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 404; CHECK-NEXT: ret; 405 %b = call float @llvm.fabs.f32(float %a) 406 ret float %b 407} 408 409define float @abs_float_ftz(float %a) #1 { 410; CHECK-LABEL: abs_float_ftz( 411; CHECK: { 412; CHECK-NEXT: .reg .f32 %f<3>; 413; CHECK-EMPTY: 414; CHECK-NEXT: // %bb.0: 415; CHECK-NEXT: ld.param.f32 %f1, [abs_float_ftz_param_0]; 416; CHECK-NEXT: abs.ftz.f32 %f2, %f1; 417; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 418; CHECK-NEXT: ret; 419 %b = call float @llvm.fabs.f32(float %a) 420 ret float %b 421} 422 423define double @abs_double(double %a) { 424; CHECK-LABEL: abs_double( 425; CHECK: { 426; CHECK-NEXT: .reg .f64 %fd<3>; 427; CHECK-EMPTY: 428; CHECK-NEXT: // %bb.0: 429; CHECK-NEXT: ld.param.f64 %fd1, [abs_double_param_0]; 430; CHECK-NEXT: abs.f64 %fd2, %fd1; 431; CHECK-NEXT: st.param.f64 [func_retval0], %fd2; 432; CHECK-NEXT: ret; 433 %b = call double @llvm.fabs.f64(double %a) 434 ret double %b 435} 436 437; ---- minnum ---- 438 439define half @minnum_half(half %a, half %b) { 440; CHECK-NOF16-LABEL: minnum_half( 441; CHECK-NOF16: { 442; CHECK-NOF16-NEXT: .reg .b16 %rs<4>; 443; CHECK-NOF16-NEXT: .reg .f32 %f<4>; 444; CHECK-NOF16-EMPTY: 445; CHECK-NOF16-NEXT: // %bb.0: 446; CHECK-NOF16-NEXT: ld.param.b16 %rs1, [minnum_half_param_0]; 447; CHECK-NOF16-NEXT: ld.param.b16 %rs2, [minnum_half_param_1]; 448; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 449; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; 450; CHECK-NOF16-NEXT: min.f32 %f3, %f2, %f1; 451; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %f3; 452; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs3; 453; CHECK-NOF16-NEXT: ret; 454; 455; CHECK-F16-LABEL: minnum_half( 456; CHECK-F16: { 457; CHECK-F16-NEXT: .reg .b16 %rs<4>; 458; CHECK-F16-EMPTY: 459; CHECK-F16-NEXT: // %bb.0: 460; CHECK-F16-NEXT: ld.param.b16 %rs1, [minnum_half_param_0]; 461; CHECK-F16-NEXT: ld.param.b16 %rs2, [minnum_half_param_1]; 462; CHECK-F16-NEXT: min.f16 %rs3, %rs1, %rs2; 463; CHECK-F16-NEXT: st.param.b16 [func_retval0], %rs3; 464; CHECK-F16-NEXT: ret; 465; 466; CHECK-SM80-NOF16-LABEL: minnum_half( 467; CHECK-SM80-NOF16: { 468; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<4>; 469; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<4>; 470; CHECK-SM80-NOF16-EMPTY: 471; CHECK-SM80-NOF16-NEXT: // %bb.0: 472; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs1, [minnum_half_param_0]; 473; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs2, [minnum_half_param_1]; 474; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 475; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; 476; CHECK-SM80-NOF16-NEXT: min.f32 %f3, %f2, %f1; 477; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %f3; 478; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs3; 479; CHECK-SM80-NOF16-NEXT: ret; 480 %x = call half @llvm.minnum.f16(half %a, half %b) 481 ret half %x 482} 483 484define float @minnum_float(float %a, float %b) { 485; CHECK-LABEL: minnum_float( 486; CHECK: { 487; CHECK-NEXT: .reg .f32 %f<4>; 488; CHECK-EMPTY: 489; CHECK-NEXT: // %bb.0: 490; CHECK-NEXT: ld.param.f32 %f1, [minnum_float_param_0]; 491; CHECK-NEXT: ld.param.f32 %f2, [minnum_float_param_1]; 492; CHECK-NEXT: min.f32 %f3, %f1, %f2; 493; CHECK-NEXT: st.param.f32 [func_retval0], %f3; 494; CHECK-NEXT: ret; 495 %x = call float @llvm.minnum.f32(float %a, float %b) 496 ret float %x 497} 498 499define float @minnum_imm1(float %a) { 500; CHECK-LABEL: minnum_imm1( 501; CHECK: { 502; CHECK-NEXT: .reg .f32 %f<3>; 503; CHECK-EMPTY: 504; CHECK-NEXT: // %bb.0: 505; CHECK-NEXT: ld.param.f32 %f1, [minnum_imm1_param_0]; 506; CHECK-NEXT: min.f32 %f2, %f1, 0f00000000; 507; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 508; CHECK-NEXT: ret; 509 %x = call float @llvm.minnum.f32(float %a, float 0.0) 510 ret float %x 511} 512 513define float @minnum_imm2(float %a) { 514; CHECK-LABEL: minnum_imm2( 515; CHECK: { 516; CHECK-NEXT: .reg .f32 %f<3>; 517; CHECK-EMPTY: 518; CHECK-NEXT: // %bb.0: 519; CHECK-NEXT: ld.param.f32 %f1, [minnum_imm2_param_0]; 520; CHECK-NEXT: min.f32 %f2, %f1, 0f00000000; 521; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 522; CHECK-NEXT: ret; 523 %x = call float @llvm.minnum.f32(float 0.0, float %a) 524 ret float %x 525} 526 527define float @minnum_float_ftz(float %a, float %b) #1 { 528; CHECK-LABEL: minnum_float_ftz( 529; CHECK: { 530; CHECK-NEXT: .reg .f32 %f<4>; 531; CHECK-EMPTY: 532; CHECK-NEXT: // %bb.0: 533; CHECK-NEXT: ld.param.f32 %f1, [minnum_float_ftz_param_0]; 534; CHECK-NEXT: ld.param.f32 %f2, [minnum_float_ftz_param_1]; 535; CHECK-NEXT: min.ftz.f32 %f3, %f1, %f2; 536; CHECK-NEXT: st.param.f32 [func_retval0], %f3; 537; CHECK-NEXT: ret; 538 %x = call float @llvm.minnum.f32(float %a, float %b) 539 ret float %x 540} 541 542define double @minnum_double(double %a, double %b) { 543; CHECK-LABEL: minnum_double( 544; CHECK: { 545; CHECK-NEXT: .reg .f64 %fd<4>; 546; CHECK-EMPTY: 547; CHECK-NEXT: // %bb.0: 548; CHECK-NEXT: ld.param.f64 %fd1, [minnum_double_param_0]; 549; CHECK-NEXT: ld.param.f64 %fd2, [minnum_double_param_1]; 550; CHECK-NEXT: min.f64 %fd3, %fd1, %fd2; 551; CHECK-NEXT: st.param.f64 [func_retval0], %fd3; 552; CHECK-NEXT: ret; 553 %x = call double @llvm.minnum.f64(double %a, double %b) 554 ret double %x 555} 556 557define <2 x half> @minnum_v2half(<2 x half> %a, <2 x half> %b) { 558; CHECK-NOF16-LABEL: minnum_v2half( 559; CHECK-NOF16: { 560; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 561; CHECK-NOF16-NEXT: .reg .b32 %r<4>; 562; CHECK-NOF16-NEXT: .reg .f32 %f<7>; 563; CHECK-NOF16-EMPTY: 564; CHECK-NOF16-NEXT: // %bb.0: 565; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minnum_v2half_param_0]; 566; CHECK-NOF16-NEXT: ld.param.b32 %r2, [minnum_v2half_param_1]; 567; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 568; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 569; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 570; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 571; CHECK-NOF16-NEXT: min.f32 %f3, %f2, %f1; 572; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %f3; 573; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; 574; CHECK-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; 575; CHECK-NOF16-NEXT: min.f32 %f6, %f5, %f4; 576; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %f6; 577; CHECK-NOF16-NEXT: mov.b32 %r3, {%rs6, %rs5}; 578; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r3; 579; CHECK-NOF16-NEXT: ret; 580; 581; CHECK-F16-LABEL: minnum_v2half( 582; CHECK-F16: { 583; CHECK-F16-NEXT: .reg .b32 %r<4>; 584; CHECK-F16-EMPTY: 585; CHECK-F16-NEXT: // %bb.0: 586; CHECK-F16-NEXT: ld.param.b32 %r1, [minnum_v2half_param_1]; 587; CHECK-F16-NEXT: ld.param.b32 %r2, [minnum_v2half_param_0]; 588; CHECK-F16-NEXT: min.f16x2 %r3, %r2, %r1; 589; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; 590; CHECK-F16-NEXT: ret; 591; 592; CHECK-SM80-NOF16-LABEL: minnum_v2half( 593; CHECK-SM80-NOF16: { 594; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<7>; 595; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>; 596; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<7>; 597; CHECK-SM80-NOF16-EMPTY: 598; CHECK-SM80-NOF16-NEXT: // %bb.0: 599; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [minnum_v2half_param_0]; 600; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [minnum_v2half_param_1]; 601; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 602; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 603; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 604; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 605; CHECK-SM80-NOF16-NEXT: min.f32 %f3, %f2, %f1; 606; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %f3; 607; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; 608; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; 609; CHECK-SM80-NOF16-NEXT: min.f32 %f6, %f5, %f4; 610; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %f6; 611; CHECK-SM80-NOF16-NEXT: mov.b32 %r3, {%rs6, %rs5}; 612; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r3; 613; CHECK-SM80-NOF16-NEXT: ret; 614 %x = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b) 615 ret <2 x half> %x 616} 617 618; ---- minimum ---- 619 620define half @minimum_half(half %a, half %b) { 621; CHECK-NOF16-LABEL: minimum_half( 622; CHECK-NOF16: { 623; CHECK-NOF16-NEXT: .reg .pred %p<6>; 624; CHECK-NOF16-NEXT: .reg .b16 %rs<8>; 625; CHECK-NOF16-NEXT: .reg .f32 %f<4>; 626; CHECK-NOF16-EMPTY: 627; CHECK-NOF16-NEXT: // %bb.0: 628; CHECK-NOF16-NEXT: ld.param.b16 %rs1, [minimum_half_param_0]; 629; CHECK-NOF16-NEXT: ld.param.b16 %rs2, [minimum_half_param_1]; 630; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 631; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; 632; CHECK-NOF16-NEXT: setp.lt.f32 %p1, %f2, %f1; 633; CHECK-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; 634; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %f2, %f1; 635; CHECK-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2; 636; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs1, -32768; 637; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; 638; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs2, -32768; 639; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; 640; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs4; 641; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; 642; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5; 643; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs7; 644; CHECK-NOF16-NEXT: ret; 645; 646; CHECK-F16-LABEL: minimum_half( 647; CHECK-F16: { 648; CHECK-F16-NEXT: .reg .b16 %rs<4>; 649; CHECK-F16-EMPTY: 650; CHECK-F16-NEXT: // %bb.0: 651; CHECK-F16-NEXT: ld.param.b16 %rs1, [minimum_half_param_0]; 652; CHECK-F16-NEXT: ld.param.b16 %rs2, [minimum_half_param_1]; 653; CHECK-F16-NEXT: min.NaN.f16 %rs3, %rs1, %rs2; 654; CHECK-F16-NEXT: st.param.b16 [func_retval0], %rs3; 655; CHECK-F16-NEXT: ret; 656; 657; CHECK-SM80-NOF16-LABEL: minimum_half( 658; CHECK-SM80-NOF16: { 659; CHECK-SM80-NOF16-NEXT: .reg .pred %p<6>; 660; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<8>; 661; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<4>; 662; CHECK-SM80-NOF16-EMPTY: 663; CHECK-SM80-NOF16-NEXT: // %bb.0: 664; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs1, [minimum_half_param_0]; 665; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs2, [minimum_half_param_1]; 666; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 667; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; 668; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p1, %f2, %f1; 669; CHECK-SM80-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; 670; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %f2, %f1; 671; CHECK-SM80-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2; 672; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs1, -32768; 673; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; 674; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs2, -32768; 675; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; 676; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f3, %rs4; 677; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; 678; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5; 679; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs7; 680; CHECK-SM80-NOF16-NEXT: ret; 681 %x = call half @llvm.minimum.f16(half %a, half %b) 682 ret half %x 683} 684 685define float @minimum_float(float %a, float %b) { 686; CHECK-NOF16-LABEL: minimum_float( 687; CHECK-NOF16: { 688; CHECK-NOF16-NEXT: .reg .pred %p<5>; 689; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 690; CHECK-NOF16-NEXT: .reg .f32 %f<8>; 691; CHECK-NOF16-EMPTY: 692; CHECK-NOF16-NEXT: // %bb.0: 693; CHECK-NOF16-NEXT: ld.param.f32 %f1, [minimum_float_param_0]; 694; CHECK-NOF16-NEXT: mov.b32 %r1, %f1; 695; CHECK-NOF16-NEXT: ld.param.f32 %f2, [minimum_float_param_1]; 696; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %f1, %f2; 697; CHECK-NOF16-NEXT: min.f32 %f3, %f1, %f2; 698; CHECK-NOF16-NEXT: selp.f32 %f4, 0f7FC00000, %f3, %p1; 699; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648; 700; CHECK-NOF16-NEXT: selp.f32 %f5, %f1, %f4, %p2; 701; CHECK-NOF16-NEXT: mov.b32 %r2, %f2; 702; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, -2147483648; 703; CHECK-NOF16-NEXT: selp.f32 %f6, %f2, %f5, %p3; 704; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %f4, 0f00000000; 705; CHECK-NOF16-NEXT: selp.f32 %f7, %f6, %f4, %p4; 706; CHECK-NOF16-NEXT: st.param.f32 [func_retval0], %f7; 707; CHECK-NOF16-NEXT: ret; 708; 709; CHECK-F16-LABEL: minimum_float( 710; CHECK-F16: { 711; CHECK-F16-NEXT: .reg .f32 %f<4>; 712; CHECK-F16-EMPTY: 713; CHECK-F16-NEXT: // %bb.0: 714; CHECK-F16-NEXT: ld.param.f32 %f1, [minimum_float_param_0]; 715; CHECK-F16-NEXT: ld.param.f32 %f2, [minimum_float_param_1]; 716; CHECK-F16-NEXT: min.NaN.f32 %f3, %f1, %f2; 717; CHECK-F16-NEXT: st.param.f32 [func_retval0], %f3; 718; CHECK-F16-NEXT: ret; 719; 720; CHECK-SM80-NOF16-LABEL: minimum_float( 721; CHECK-SM80-NOF16: { 722; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<4>; 723; CHECK-SM80-NOF16-EMPTY: 724; CHECK-SM80-NOF16-NEXT: // %bb.0: 725; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [minimum_float_param_0]; 726; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f2, [minimum_float_param_1]; 727; CHECK-SM80-NOF16-NEXT: min.NaN.f32 %f3, %f1, %f2; 728; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0], %f3; 729; CHECK-SM80-NOF16-NEXT: ret; 730 %x = call float @llvm.minimum.f32(float %a, float %b) 731 ret float %x 732} 733 734define float @minimum_imm1(float %a) { 735; CHECK-NOF16-LABEL: minimum_imm1( 736; CHECK-NOF16: { 737; CHECK-NOF16-NEXT: .reg .pred %p<4>; 738; CHECK-NOF16-NEXT: .reg .b32 %r<2>; 739; CHECK-NOF16-NEXT: .reg .f32 %f<6>; 740; CHECK-NOF16-EMPTY: 741; CHECK-NOF16-NEXT: // %bb.0: 742; CHECK-NOF16-NEXT: ld.param.f32 %f1, [minimum_imm1_param_0]; 743; CHECK-NOF16-NEXT: mov.b32 %r1, %f1; 744; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %f1, %f1; 745; CHECK-NOF16-NEXT: min.f32 %f2, %f1, 0f00000000; 746; CHECK-NOF16-NEXT: selp.f32 %f3, 0f7FC00000, %f2, %p1; 747; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648; 748; CHECK-NOF16-NEXT: selp.f32 %f4, %f1, %f3, %p2; 749; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %f3, 0f00000000; 750; CHECK-NOF16-NEXT: selp.f32 %f5, %f4, %f3, %p3; 751; CHECK-NOF16-NEXT: st.param.f32 [func_retval0], %f5; 752; CHECK-NOF16-NEXT: ret; 753; 754; CHECK-F16-LABEL: minimum_imm1( 755; CHECK-F16: { 756; CHECK-F16-NEXT: .reg .f32 %f<3>; 757; CHECK-F16-EMPTY: 758; CHECK-F16-NEXT: // %bb.0: 759; CHECK-F16-NEXT: ld.param.f32 %f1, [minimum_imm1_param_0]; 760; CHECK-F16-NEXT: min.NaN.f32 %f2, %f1, 0f00000000; 761; CHECK-F16-NEXT: st.param.f32 [func_retval0], %f2; 762; CHECK-F16-NEXT: ret; 763; 764; CHECK-SM80-NOF16-LABEL: minimum_imm1( 765; CHECK-SM80-NOF16: { 766; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<3>; 767; CHECK-SM80-NOF16-EMPTY: 768; CHECK-SM80-NOF16-NEXT: // %bb.0: 769; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [minimum_imm1_param_0]; 770; CHECK-SM80-NOF16-NEXT: min.NaN.f32 %f2, %f1, 0f00000000; 771; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0], %f2; 772; CHECK-SM80-NOF16-NEXT: ret; 773 %x = call float @llvm.minimum.f32(float %a, float 0.0) 774 ret float %x 775} 776 777define float @minimum_imm2(float %a) { 778; CHECK-NOF16-LABEL: minimum_imm2( 779; CHECK-NOF16: { 780; CHECK-NOF16-NEXT: .reg .pred %p<4>; 781; CHECK-NOF16-NEXT: .reg .b32 %r<2>; 782; CHECK-NOF16-NEXT: .reg .f32 %f<6>; 783; CHECK-NOF16-EMPTY: 784; CHECK-NOF16-NEXT: // %bb.0: 785; CHECK-NOF16-NEXT: ld.param.f32 %f1, [minimum_imm2_param_0]; 786; CHECK-NOF16-NEXT: mov.b32 %r1, %f1; 787; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %f1, %f1; 788; CHECK-NOF16-NEXT: min.f32 %f2, %f1, 0f00000000; 789; CHECK-NOF16-NEXT: selp.f32 %f3, 0f7FC00000, %f2, %p1; 790; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648; 791; CHECK-NOF16-NEXT: selp.f32 %f4, %f1, %f3, %p2; 792; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %f3, 0f00000000; 793; CHECK-NOF16-NEXT: selp.f32 %f5, %f4, %f3, %p3; 794; CHECK-NOF16-NEXT: st.param.f32 [func_retval0], %f5; 795; CHECK-NOF16-NEXT: ret; 796; 797; CHECK-F16-LABEL: minimum_imm2( 798; CHECK-F16: { 799; CHECK-F16-NEXT: .reg .f32 %f<3>; 800; CHECK-F16-EMPTY: 801; CHECK-F16-NEXT: // %bb.0: 802; CHECK-F16-NEXT: ld.param.f32 %f1, [minimum_imm2_param_0]; 803; CHECK-F16-NEXT: min.NaN.f32 %f2, %f1, 0f00000000; 804; CHECK-F16-NEXT: st.param.f32 [func_retval0], %f2; 805; CHECK-F16-NEXT: ret; 806; 807; CHECK-SM80-NOF16-LABEL: minimum_imm2( 808; CHECK-SM80-NOF16: { 809; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<3>; 810; CHECK-SM80-NOF16-EMPTY: 811; CHECK-SM80-NOF16-NEXT: // %bb.0: 812; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [minimum_imm2_param_0]; 813; CHECK-SM80-NOF16-NEXT: min.NaN.f32 %f2, %f1, 0f00000000; 814; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0], %f2; 815; CHECK-SM80-NOF16-NEXT: ret; 816 %x = call float @llvm.minimum.f32(float 0.0, float %a) 817 ret float %x 818} 819 820define float @minimum_float_ftz(float %a, float %b) #1 { 821; CHECK-NOF16-LABEL: minimum_float_ftz( 822; CHECK-NOF16: { 823; CHECK-NOF16-NEXT: .reg .pred %p<5>; 824; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 825; CHECK-NOF16-NEXT: .reg .f32 %f<8>; 826; CHECK-NOF16-EMPTY: 827; CHECK-NOF16-NEXT: // %bb.0: 828; CHECK-NOF16-NEXT: ld.param.f32 %f1, [minimum_float_ftz_param_0]; 829; CHECK-NOF16-NEXT: mov.b32 %r1, %f1; 830; CHECK-NOF16-NEXT: ld.param.f32 %f2, [minimum_float_ftz_param_1]; 831; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p1, %f1, %f2; 832; CHECK-NOF16-NEXT: min.ftz.f32 %f3, %f1, %f2; 833; CHECK-NOF16-NEXT: selp.f32 %f4, 0f7FC00000, %f3, %p1; 834; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648; 835; CHECK-NOF16-NEXT: selp.f32 %f5, %f1, %f4, %p2; 836; CHECK-NOF16-NEXT: mov.b32 %r2, %f2; 837; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, -2147483648; 838; CHECK-NOF16-NEXT: selp.f32 %f6, %f2, %f5, %p3; 839; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p4, %f4, 0f00000000; 840; CHECK-NOF16-NEXT: selp.f32 %f7, %f6, %f4, %p4; 841; CHECK-NOF16-NEXT: st.param.f32 [func_retval0], %f7; 842; CHECK-NOF16-NEXT: ret; 843; 844; CHECK-F16-LABEL: minimum_float_ftz( 845; CHECK-F16: { 846; CHECK-F16-NEXT: .reg .f32 %f<4>; 847; CHECK-F16-EMPTY: 848; CHECK-F16-NEXT: // %bb.0: 849; CHECK-F16-NEXT: ld.param.f32 %f1, [minimum_float_ftz_param_0]; 850; CHECK-F16-NEXT: ld.param.f32 %f2, [minimum_float_ftz_param_1]; 851; CHECK-F16-NEXT: min.NaN.ftz.f32 %f3, %f1, %f2; 852; CHECK-F16-NEXT: st.param.f32 [func_retval0], %f3; 853; CHECK-F16-NEXT: ret; 854; 855; CHECK-SM80-NOF16-LABEL: minimum_float_ftz( 856; CHECK-SM80-NOF16: { 857; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<4>; 858; CHECK-SM80-NOF16-EMPTY: 859; CHECK-SM80-NOF16-NEXT: // %bb.0: 860; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [minimum_float_ftz_param_0]; 861; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f2, [minimum_float_ftz_param_1]; 862; CHECK-SM80-NOF16-NEXT: min.NaN.ftz.f32 %f3, %f1, %f2; 863; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0], %f3; 864; CHECK-SM80-NOF16-NEXT: ret; 865 %x = call float @llvm.minimum.f32(float %a, float %b) 866 ret float %x 867} 868 869define double @minimum_double(double %a, double %b) { 870; CHECK-LABEL: minimum_double( 871; CHECK: { 872; CHECK-NEXT: .reg .pred %p<5>; 873; CHECK-NEXT: .reg .b64 %rd<3>; 874; CHECK-NEXT: .reg .f64 %fd<8>; 875; CHECK-EMPTY: 876; CHECK-NEXT: // %bb.0: 877; CHECK-NEXT: ld.param.f64 %fd1, [minimum_double_param_0]; 878; CHECK-NEXT: mov.b64 %rd1, %fd1; 879; CHECK-NEXT: ld.param.f64 %fd2, [minimum_double_param_1]; 880; CHECK-NEXT: setp.nan.f64 %p1, %fd1, %fd2; 881; CHECK-NEXT: min.f64 %fd3, %fd1, %fd2; 882; CHECK-NEXT: selp.f64 %fd4, 0d7FF8000000000000, %fd3, %p1; 883; CHECK-NEXT: setp.eq.s64 %p2, %rd1, -9223372036854775808; 884; CHECK-NEXT: selp.f64 %fd5, %fd1, %fd4, %p2; 885; CHECK-NEXT: mov.b64 %rd2, %fd2; 886; CHECK-NEXT: setp.eq.s64 %p3, %rd2, -9223372036854775808; 887; CHECK-NEXT: selp.f64 %fd6, %fd2, %fd5, %p3; 888; CHECK-NEXT: setp.eq.f64 %p4, %fd4, 0d0000000000000000; 889; CHECK-NEXT: selp.f64 %fd7, %fd6, %fd4, %p4; 890; CHECK-NEXT: st.param.f64 [func_retval0], %fd7; 891; CHECK-NEXT: ret; 892 %x = call double @llvm.minimum.f64(double %a, double %b) 893 ret double %x 894} 895 896define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) { 897; CHECK-NOF16-LABEL: minimum_v2half( 898; CHECK-NOF16: { 899; CHECK-NOF16-NEXT: .reg .pred %p<11>; 900; CHECK-NOF16-NEXT: .reg .b16 %rs<15>; 901; CHECK-NOF16-NEXT: .reg .b32 %r<4>; 902; CHECK-NOF16-NEXT: .reg .f32 %f<7>; 903; CHECK-NOF16-EMPTY: 904; CHECK-NOF16-NEXT: // %bb.0: 905; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_v2half_param_0]; 906; CHECK-NOF16-NEXT: ld.param.b32 %r2, [minimum_v2half_param_1]; 907; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 908; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 909; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 910; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 911; CHECK-NOF16-NEXT: setp.lt.f32 %p1, %f2, %f1; 912; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1; 913; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %f2, %f1; 914; CHECK-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; 915; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs4, -32768; 916; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs4, %rs6, %p3; 917; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs2, -32768; 918; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs2, %rs7, %p4; 919; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs6; 920; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; 921; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs6, %p5; 922; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; 923; CHECK-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; 924; CHECK-NOF16-NEXT: setp.lt.f32 %p6, %f5, %f4; 925; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs3, %rs1, %p6; 926; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %f5, %f4; 927; CHECK-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7; 928; CHECK-NOF16-NEXT: setp.eq.s16 %p8, %rs3, -32768; 929; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs11, %p8; 930; CHECK-NOF16-NEXT: setp.eq.s16 %p9, %rs1, -32768; 931; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs1, %rs12, %p9; 932; CHECK-NOF16-NEXT: cvt.f32.f16 %f6, %rs11; 933; CHECK-NOF16-NEXT: setp.eq.f32 %p10, %f6, 0f00000000; 934; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs13, %rs11, %p10; 935; CHECK-NOF16-NEXT: mov.b32 %r3, {%rs14, %rs9}; 936; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r3; 937; CHECK-NOF16-NEXT: ret; 938; 939; CHECK-F16-LABEL: minimum_v2half( 940; CHECK-F16: { 941; CHECK-F16-NEXT: .reg .b32 %r<4>; 942; CHECK-F16-EMPTY: 943; CHECK-F16-NEXT: // %bb.0: 944; CHECK-F16-NEXT: ld.param.b32 %r1, [minimum_v2half_param_1]; 945; CHECK-F16-NEXT: ld.param.b32 %r2, [minimum_v2half_param_0]; 946; CHECK-F16-NEXT: min.NaN.f16x2 %r3, %r2, %r1; 947; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; 948; CHECK-F16-NEXT: ret; 949; 950; CHECK-SM80-NOF16-LABEL: minimum_v2half( 951; CHECK-SM80-NOF16: { 952; CHECK-SM80-NOF16-NEXT: .reg .pred %p<11>; 953; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<15>; 954; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>; 955; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<7>; 956; CHECK-SM80-NOF16-EMPTY: 957; CHECK-SM80-NOF16-NEXT: // %bb.0: 958; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [minimum_v2half_param_0]; 959; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [minimum_v2half_param_1]; 960; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 961; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 962; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 963; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 964; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p1, %f2, %f1; 965; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1; 966; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %f2, %f1; 967; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; 968; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs4, -32768; 969; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs4, %rs6, %p3; 970; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs2, -32768; 971; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs2, %rs7, %p4; 972; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f3, %rs6; 973; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; 974; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs6, %p5; 975; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; 976; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; 977; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p6, %f5, %f4; 978; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs3, %rs1, %p6; 979; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %f5, %f4; 980; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7; 981; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p8, %rs3, -32768; 982; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs11, %p8; 983; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p9, %rs1, -32768; 984; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs1, %rs12, %p9; 985; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f6, %rs11; 986; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p10, %f6, 0f00000000; 987; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs13, %rs11, %p10; 988; CHECK-SM80-NOF16-NEXT: mov.b32 %r3, {%rs14, %rs9}; 989; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r3; 990; CHECK-SM80-NOF16-NEXT: ret; 991 %x = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b) 992 ret <2 x half> %x 993} 994 995; ---- maxnum ---- 996 997define half @maxnum_half(half %a, half %b) { 998; CHECK-NOF16-LABEL: maxnum_half( 999; CHECK-NOF16: { 1000; CHECK-NOF16-NEXT: .reg .b16 %rs<4>; 1001; CHECK-NOF16-NEXT: .reg .f32 %f<4>; 1002; CHECK-NOF16-EMPTY: 1003; CHECK-NOF16-NEXT: // %bb.0: 1004; CHECK-NOF16-NEXT: ld.param.b16 %rs1, [maxnum_half_param_0]; 1005; CHECK-NOF16-NEXT: ld.param.b16 %rs2, [maxnum_half_param_1]; 1006; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 1007; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; 1008; CHECK-NOF16-NEXT: max.f32 %f3, %f2, %f1; 1009; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %f3; 1010; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs3; 1011; CHECK-NOF16-NEXT: ret; 1012; 1013; CHECK-F16-LABEL: maxnum_half( 1014; CHECK-F16: { 1015; CHECK-F16-NEXT: .reg .b16 %rs<4>; 1016; CHECK-F16-EMPTY: 1017; CHECK-F16-NEXT: // %bb.0: 1018; CHECK-F16-NEXT: ld.param.b16 %rs1, [maxnum_half_param_0]; 1019; CHECK-F16-NEXT: ld.param.b16 %rs2, [maxnum_half_param_1]; 1020; CHECK-F16-NEXT: max.f16 %rs3, %rs1, %rs2; 1021; CHECK-F16-NEXT: st.param.b16 [func_retval0], %rs3; 1022; CHECK-F16-NEXT: ret; 1023; 1024; CHECK-SM80-NOF16-LABEL: maxnum_half( 1025; CHECK-SM80-NOF16: { 1026; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<4>; 1027; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<4>; 1028; CHECK-SM80-NOF16-EMPTY: 1029; CHECK-SM80-NOF16-NEXT: // %bb.0: 1030; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs1, [maxnum_half_param_0]; 1031; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs2, [maxnum_half_param_1]; 1032; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 1033; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; 1034; CHECK-SM80-NOF16-NEXT: max.f32 %f3, %f2, %f1; 1035; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %f3; 1036; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs3; 1037; CHECK-SM80-NOF16-NEXT: ret; 1038 %x = call half @llvm.maxnum.f16(half %a, half %b) 1039 ret half %x 1040} 1041 1042define float @maxnum_imm1(float %a) { 1043; CHECK-LABEL: maxnum_imm1( 1044; CHECK: { 1045; CHECK-NEXT: .reg .f32 %f<3>; 1046; CHECK-EMPTY: 1047; CHECK-NEXT: // %bb.0: 1048; CHECK-NEXT: ld.param.f32 %f1, [maxnum_imm1_param_0]; 1049; CHECK-NEXT: max.f32 %f2, %f1, 0f00000000; 1050; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 1051; CHECK-NEXT: ret; 1052 %x = call float @llvm.maxnum.f32(float %a, float 0.0) 1053 ret float %x 1054} 1055 1056define float @maxnum_imm2(float %a) { 1057; CHECK-LABEL: maxnum_imm2( 1058; CHECK: { 1059; CHECK-NEXT: .reg .f32 %f<3>; 1060; CHECK-EMPTY: 1061; CHECK-NEXT: // %bb.0: 1062; CHECK-NEXT: ld.param.f32 %f1, [maxnum_imm2_param_0]; 1063; CHECK-NEXT: max.f32 %f2, %f1, 0f00000000; 1064; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 1065; CHECK-NEXT: ret; 1066 %x = call float @llvm.maxnum.f32(float 0.0, float %a) 1067 ret float %x 1068} 1069 1070define float @maxnum_float(float %a, float %b) { 1071; CHECK-LABEL: maxnum_float( 1072; CHECK: { 1073; CHECK-NEXT: .reg .f32 %f<4>; 1074; CHECK-EMPTY: 1075; CHECK-NEXT: // %bb.0: 1076; CHECK-NEXT: ld.param.f32 %f1, [maxnum_float_param_0]; 1077; CHECK-NEXT: ld.param.f32 %f2, [maxnum_float_param_1]; 1078; CHECK-NEXT: max.f32 %f3, %f1, %f2; 1079; CHECK-NEXT: st.param.f32 [func_retval0], %f3; 1080; CHECK-NEXT: ret; 1081 %x = call float @llvm.maxnum.f32(float %a, float %b) 1082 ret float %x 1083} 1084 1085define float @maxnum_float_ftz(float %a, float %b) #1 { 1086; CHECK-LABEL: maxnum_float_ftz( 1087; CHECK: { 1088; CHECK-NEXT: .reg .f32 %f<4>; 1089; CHECK-EMPTY: 1090; CHECK-NEXT: // %bb.0: 1091; CHECK-NEXT: ld.param.f32 %f1, [maxnum_float_ftz_param_0]; 1092; CHECK-NEXT: ld.param.f32 %f2, [maxnum_float_ftz_param_1]; 1093; CHECK-NEXT: max.ftz.f32 %f3, %f1, %f2; 1094; CHECK-NEXT: st.param.f32 [func_retval0], %f3; 1095; CHECK-NEXT: ret; 1096 %x = call float @llvm.maxnum.f32(float %a, float %b) 1097 ret float %x 1098} 1099 1100define double @maxnum_double(double %a, double %b) { 1101; CHECK-LABEL: maxnum_double( 1102; CHECK: { 1103; CHECK-NEXT: .reg .f64 %fd<4>; 1104; CHECK-EMPTY: 1105; CHECK-NEXT: // %bb.0: 1106; CHECK-NEXT: ld.param.f64 %fd1, [maxnum_double_param_0]; 1107; CHECK-NEXT: ld.param.f64 %fd2, [maxnum_double_param_1]; 1108; CHECK-NEXT: max.f64 %fd3, %fd1, %fd2; 1109; CHECK-NEXT: st.param.f64 [func_retval0], %fd3; 1110; CHECK-NEXT: ret; 1111 %x = call double @llvm.maxnum.f64(double %a, double %b) 1112 ret double %x 1113} 1114 1115define <2 x half> @maxnum_v2half(<2 x half> %a, <2 x half> %b) { 1116; CHECK-NOF16-LABEL: maxnum_v2half( 1117; CHECK-NOF16: { 1118; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; 1119; CHECK-NOF16-NEXT: .reg .b32 %r<4>; 1120; CHECK-NOF16-NEXT: .reg .f32 %f<7>; 1121; CHECK-NOF16-EMPTY: 1122; CHECK-NOF16-NEXT: // %bb.0: 1123; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maxnum_v2half_param_0]; 1124; CHECK-NOF16-NEXT: ld.param.b32 %r2, [maxnum_v2half_param_1]; 1125; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 1126; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 1127; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 1128; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 1129; CHECK-NOF16-NEXT: max.f32 %f3, %f2, %f1; 1130; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %f3; 1131; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; 1132; CHECK-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; 1133; CHECK-NOF16-NEXT: max.f32 %f6, %f5, %f4; 1134; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %f6; 1135; CHECK-NOF16-NEXT: mov.b32 %r3, {%rs6, %rs5}; 1136; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r3; 1137; CHECK-NOF16-NEXT: ret; 1138; 1139; CHECK-F16-LABEL: maxnum_v2half( 1140; CHECK-F16: { 1141; CHECK-F16-NEXT: .reg .b32 %r<4>; 1142; CHECK-F16-EMPTY: 1143; CHECK-F16-NEXT: // %bb.0: 1144; CHECK-F16-NEXT: ld.param.b32 %r1, [maxnum_v2half_param_1]; 1145; CHECK-F16-NEXT: ld.param.b32 %r2, [maxnum_v2half_param_0]; 1146; CHECK-F16-NEXT: max.f16x2 %r3, %r2, %r1; 1147; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; 1148; CHECK-F16-NEXT: ret; 1149; 1150; CHECK-SM80-NOF16-LABEL: maxnum_v2half( 1151; CHECK-SM80-NOF16: { 1152; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<7>; 1153; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>; 1154; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<7>; 1155; CHECK-SM80-NOF16-EMPTY: 1156; CHECK-SM80-NOF16-NEXT: // %bb.0: 1157; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [maxnum_v2half_param_0]; 1158; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [maxnum_v2half_param_1]; 1159; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 1160; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 1161; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 1162; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 1163; CHECK-SM80-NOF16-NEXT: max.f32 %f3, %f2, %f1; 1164; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %f3; 1165; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; 1166; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; 1167; CHECK-SM80-NOF16-NEXT: max.f32 %f6, %f5, %f4; 1168; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %f6; 1169; CHECK-SM80-NOF16-NEXT: mov.b32 %r3, {%rs6, %rs5}; 1170; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r3; 1171; CHECK-SM80-NOF16-NEXT: ret; 1172 %x = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b) 1173 ret <2 x half> %x 1174} 1175 1176; ---- maximum ---- 1177 1178define half @maximum_half(half %a, half %b) { 1179; CHECK-NOF16-LABEL: maximum_half( 1180; CHECK-NOF16: { 1181; CHECK-NOF16-NEXT: .reg .pred %p<6>; 1182; CHECK-NOF16-NEXT: .reg .b16 %rs<8>; 1183; CHECK-NOF16-NEXT: .reg .f32 %f<4>; 1184; CHECK-NOF16-EMPTY: 1185; CHECK-NOF16-NEXT: // %bb.0: 1186; CHECK-NOF16-NEXT: ld.param.b16 %rs1, [maximum_half_param_0]; 1187; CHECK-NOF16-NEXT: ld.param.b16 %rs2, [maximum_half_param_1]; 1188; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 1189; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; 1190; CHECK-NOF16-NEXT: setp.gt.f32 %p1, %f2, %f1; 1191; CHECK-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; 1192; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %f2, %f1; 1193; CHECK-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2; 1194; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs1, 0; 1195; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; 1196; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0; 1197; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; 1198; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs4; 1199; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; 1200; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5; 1201; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs7; 1202; CHECK-NOF16-NEXT: ret; 1203; 1204; CHECK-F16-LABEL: maximum_half( 1205; CHECK-F16: { 1206; CHECK-F16-NEXT: .reg .b16 %rs<4>; 1207; CHECK-F16-EMPTY: 1208; CHECK-F16-NEXT: // %bb.0: 1209; CHECK-F16-NEXT: ld.param.b16 %rs1, [maximum_half_param_0]; 1210; CHECK-F16-NEXT: ld.param.b16 %rs2, [maximum_half_param_1]; 1211; CHECK-F16-NEXT: max.NaN.f16 %rs3, %rs1, %rs2; 1212; CHECK-F16-NEXT: st.param.b16 [func_retval0], %rs3; 1213; CHECK-F16-NEXT: ret; 1214; 1215; CHECK-SM80-NOF16-LABEL: maximum_half( 1216; CHECK-SM80-NOF16: { 1217; CHECK-SM80-NOF16-NEXT: .reg .pred %p<6>; 1218; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<8>; 1219; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<4>; 1220; CHECK-SM80-NOF16-EMPTY: 1221; CHECK-SM80-NOF16-NEXT: // %bb.0: 1222; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs1, [maximum_half_param_0]; 1223; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs2, [maximum_half_param_1]; 1224; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 1225; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; 1226; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p1, %f2, %f1; 1227; CHECK-SM80-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; 1228; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %f2, %f1; 1229; CHECK-SM80-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2; 1230; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs1, 0; 1231; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; 1232; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0; 1233; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; 1234; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f3, %rs4; 1235; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; 1236; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5; 1237; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs7; 1238; CHECK-SM80-NOF16-NEXT: ret; 1239 %x = call half @llvm.maximum.f16(half %a, half %b) 1240 ret half %x 1241} 1242 1243define float @maximum_imm1(float %a) { 1244; CHECK-NOF16-LABEL: maximum_imm1( 1245; CHECK-NOF16: { 1246; CHECK-NOF16-NEXT: .reg .pred %p<3>; 1247; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 1248; CHECK-NOF16-EMPTY: 1249; CHECK-NOF16-NEXT: // %bb.0: 1250; CHECK-NOF16-NEXT: ld.param.f32 %f1, [maximum_imm1_param_0]; 1251; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %f1, %f1; 1252; CHECK-NOF16-NEXT: max.f32 %f2, %f1, 0f00000000; 1253; CHECK-NOF16-NEXT: selp.f32 %f3, 0f7FC00000, %f2, %p1; 1254; CHECK-NOF16-NEXT: setp.eq.f32 %p2, %f3, 0f00000000; 1255; CHECK-NOF16-NEXT: selp.f32 %f4, 0f00000000, %f3, %p2; 1256; CHECK-NOF16-NEXT: st.param.f32 [func_retval0], %f4; 1257; CHECK-NOF16-NEXT: ret; 1258; 1259; CHECK-F16-LABEL: maximum_imm1( 1260; CHECK-F16: { 1261; CHECK-F16-NEXT: .reg .f32 %f<3>; 1262; CHECK-F16-EMPTY: 1263; CHECK-F16-NEXT: // %bb.0: 1264; CHECK-F16-NEXT: ld.param.f32 %f1, [maximum_imm1_param_0]; 1265; CHECK-F16-NEXT: max.NaN.f32 %f2, %f1, 0f00000000; 1266; CHECK-F16-NEXT: st.param.f32 [func_retval0], %f2; 1267; CHECK-F16-NEXT: ret; 1268; 1269; CHECK-SM80-NOF16-LABEL: maximum_imm1( 1270; CHECK-SM80-NOF16: { 1271; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<3>; 1272; CHECK-SM80-NOF16-EMPTY: 1273; CHECK-SM80-NOF16-NEXT: // %bb.0: 1274; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [maximum_imm1_param_0]; 1275; CHECK-SM80-NOF16-NEXT: max.NaN.f32 %f2, %f1, 0f00000000; 1276; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0], %f2; 1277; CHECK-SM80-NOF16-NEXT: ret; 1278 %x = call float @llvm.maximum.f32(float %a, float 0.0) 1279 ret float %x 1280} 1281 1282define float @maximum_imm2(float %a) { 1283; CHECK-NOF16-LABEL: maximum_imm2( 1284; CHECK-NOF16: { 1285; CHECK-NOF16-NEXT: .reg .pred %p<3>; 1286; CHECK-NOF16-NEXT: .reg .f32 %f<5>; 1287; CHECK-NOF16-EMPTY: 1288; CHECK-NOF16-NEXT: // %bb.0: 1289; CHECK-NOF16-NEXT: ld.param.f32 %f1, [maximum_imm2_param_0]; 1290; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %f1, %f1; 1291; CHECK-NOF16-NEXT: max.f32 %f2, %f1, 0f00000000; 1292; CHECK-NOF16-NEXT: selp.f32 %f3, 0f7FC00000, %f2, %p1; 1293; CHECK-NOF16-NEXT: setp.eq.f32 %p2, %f3, 0f00000000; 1294; CHECK-NOF16-NEXT: selp.f32 %f4, 0f00000000, %f3, %p2; 1295; CHECK-NOF16-NEXT: st.param.f32 [func_retval0], %f4; 1296; CHECK-NOF16-NEXT: ret; 1297; 1298; CHECK-F16-LABEL: maximum_imm2( 1299; CHECK-F16: { 1300; CHECK-F16-NEXT: .reg .f32 %f<3>; 1301; CHECK-F16-EMPTY: 1302; CHECK-F16-NEXT: // %bb.0: 1303; CHECK-F16-NEXT: ld.param.f32 %f1, [maximum_imm2_param_0]; 1304; CHECK-F16-NEXT: max.NaN.f32 %f2, %f1, 0f00000000; 1305; CHECK-F16-NEXT: st.param.f32 [func_retval0], %f2; 1306; CHECK-F16-NEXT: ret; 1307; 1308; CHECK-SM80-NOF16-LABEL: maximum_imm2( 1309; CHECK-SM80-NOF16: { 1310; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<3>; 1311; CHECK-SM80-NOF16-EMPTY: 1312; CHECK-SM80-NOF16-NEXT: // %bb.0: 1313; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [maximum_imm2_param_0]; 1314; CHECK-SM80-NOF16-NEXT: max.NaN.f32 %f2, %f1, 0f00000000; 1315; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0], %f2; 1316; CHECK-SM80-NOF16-NEXT: ret; 1317 %x = call float @llvm.maximum.f32(float 0.0, float %a) 1318 ret float %x 1319} 1320 1321define float @maximum_float(float %a, float %b) { 1322; CHECK-NOF16-LABEL: maximum_float( 1323; CHECK-NOF16: { 1324; CHECK-NOF16-NEXT: .reg .pred %p<5>; 1325; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 1326; CHECK-NOF16-NEXT: .reg .f32 %f<8>; 1327; CHECK-NOF16-EMPTY: 1328; CHECK-NOF16-NEXT: // %bb.0: 1329; CHECK-NOF16-NEXT: ld.param.f32 %f1, [maximum_float_param_0]; 1330; CHECK-NOF16-NEXT: mov.b32 %r1, %f1; 1331; CHECK-NOF16-NEXT: ld.param.f32 %f2, [maximum_float_param_1]; 1332; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %f1, %f2; 1333; CHECK-NOF16-NEXT: max.f32 %f3, %f1, %f2; 1334; CHECK-NOF16-NEXT: selp.f32 %f4, 0f7FC00000, %f3, %p1; 1335; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, 0; 1336; CHECK-NOF16-NEXT: selp.f32 %f5, %f1, %f4, %p2; 1337; CHECK-NOF16-NEXT: mov.b32 %r2, %f2; 1338; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, 0; 1339; CHECK-NOF16-NEXT: selp.f32 %f6, %f2, %f5, %p3; 1340; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %f4, 0f00000000; 1341; CHECK-NOF16-NEXT: selp.f32 %f7, %f6, %f4, %p4; 1342; CHECK-NOF16-NEXT: st.param.f32 [func_retval0], %f7; 1343; CHECK-NOF16-NEXT: ret; 1344; 1345; CHECK-F16-LABEL: maximum_float( 1346; CHECK-F16: { 1347; CHECK-F16-NEXT: .reg .f32 %f<4>; 1348; CHECK-F16-EMPTY: 1349; CHECK-F16-NEXT: // %bb.0: 1350; CHECK-F16-NEXT: ld.param.f32 %f1, [maximum_float_param_0]; 1351; CHECK-F16-NEXT: ld.param.f32 %f2, [maximum_float_param_1]; 1352; CHECK-F16-NEXT: max.NaN.f32 %f3, %f1, %f2; 1353; CHECK-F16-NEXT: st.param.f32 [func_retval0], %f3; 1354; CHECK-F16-NEXT: ret; 1355; 1356; CHECK-SM80-NOF16-LABEL: maximum_float( 1357; CHECK-SM80-NOF16: { 1358; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<4>; 1359; CHECK-SM80-NOF16-EMPTY: 1360; CHECK-SM80-NOF16-NEXT: // %bb.0: 1361; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [maximum_float_param_0]; 1362; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f2, [maximum_float_param_1]; 1363; CHECK-SM80-NOF16-NEXT: max.NaN.f32 %f3, %f1, %f2; 1364; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0], %f3; 1365; CHECK-SM80-NOF16-NEXT: ret; 1366 %x = call float @llvm.maximum.f32(float %a, float %b) 1367 ret float %x 1368} 1369 1370define float @maximum_float_ftz(float %a, float %b) #1 { 1371; CHECK-NOF16-LABEL: maximum_float_ftz( 1372; CHECK-NOF16: { 1373; CHECK-NOF16-NEXT: .reg .pred %p<5>; 1374; CHECK-NOF16-NEXT: .reg .b32 %r<3>; 1375; CHECK-NOF16-NEXT: .reg .f32 %f<8>; 1376; CHECK-NOF16-EMPTY: 1377; CHECK-NOF16-NEXT: // %bb.0: 1378; CHECK-NOF16-NEXT: ld.param.f32 %f1, [maximum_float_ftz_param_0]; 1379; CHECK-NOF16-NEXT: mov.b32 %r1, %f1; 1380; CHECK-NOF16-NEXT: ld.param.f32 %f2, [maximum_float_ftz_param_1]; 1381; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p1, %f1, %f2; 1382; CHECK-NOF16-NEXT: max.ftz.f32 %f3, %f1, %f2; 1383; CHECK-NOF16-NEXT: selp.f32 %f4, 0f7FC00000, %f3, %p1; 1384; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, 0; 1385; CHECK-NOF16-NEXT: selp.f32 %f5, %f1, %f4, %p2; 1386; CHECK-NOF16-NEXT: mov.b32 %r2, %f2; 1387; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, 0; 1388; CHECK-NOF16-NEXT: selp.f32 %f6, %f2, %f5, %p3; 1389; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p4, %f4, 0f00000000; 1390; CHECK-NOF16-NEXT: selp.f32 %f7, %f6, %f4, %p4; 1391; CHECK-NOF16-NEXT: st.param.f32 [func_retval0], %f7; 1392; CHECK-NOF16-NEXT: ret; 1393; 1394; CHECK-F16-LABEL: maximum_float_ftz( 1395; CHECK-F16: { 1396; CHECK-F16-NEXT: .reg .f32 %f<4>; 1397; CHECK-F16-EMPTY: 1398; CHECK-F16-NEXT: // %bb.0: 1399; CHECK-F16-NEXT: ld.param.f32 %f1, [maximum_float_ftz_param_0]; 1400; CHECK-F16-NEXT: ld.param.f32 %f2, [maximum_float_ftz_param_1]; 1401; CHECK-F16-NEXT: max.NaN.ftz.f32 %f3, %f1, %f2; 1402; CHECK-F16-NEXT: st.param.f32 [func_retval0], %f3; 1403; CHECK-F16-NEXT: ret; 1404; 1405; CHECK-SM80-NOF16-LABEL: maximum_float_ftz( 1406; CHECK-SM80-NOF16: { 1407; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<4>; 1408; CHECK-SM80-NOF16-EMPTY: 1409; CHECK-SM80-NOF16-NEXT: // %bb.0: 1410; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [maximum_float_ftz_param_0]; 1411; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f2, [maximum_float_ftz_param_1]; 1412; CHECK-SM80-NOF16-NEXT: max.NaN.ftz.f32 %f3, %f1, %f2; 1413; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0], %f3; 1414; CHECK-SM80-NOF16-NEXT: ret; 1415 %x = call float @llvm.maximum.f32(float %a, float %b) 1416 ret float %x 1417} 1418 1419define double @maximum_double(double %a, double %b) { 1420; CHECK-LABEL: maximum_double( 1421; CHECK: { 1422; CHECK-NEXT: .reg .pred %p<5>; 1423; CHECK-NEXT: .reg .b64 %rd<3>; 1424; CHECK-NEXT: .reg .f64 %fd<8>; 1425; CHECK-EMPTY: 1426; CHECK-NEXT: // %bb.0: 1427; CHECK-NEXT: ld.param.f64 %fd1, [maximum_double_param_0]; 1428; CHECK-NEXT: mov.b64 %rd1, %fd1; 1429; CHECK-NEXT: ld.param.f64 %fd2, [maximum_double_param_1]; 1430; CHECK-NEXT: setp.nan.f64 %p1, %fd1, %fd2; 1431; CHECK-NEXT: max.f64 %fd3, %fd1, %fd2; 1432; CHECK-NEXT: selp.f64 %fd4, 0d7FF8000000000000, %fd3, %p1; 1433; CHECK-NEXT: setp.eq.s64 %p2, %rd1, 0; 1434; CHECK-NEXT: selp.f64 %fd5, %fd1, %fd4, %p2; 1435; CHECK-NEXT: mov.b64 %rd2, %fd2; 1436; CHECK-NEXT: setp.eq.s64 %p3, %rd2, 0; 1437; CHECK-NEXT: selp.f64 %fd6, %fd2, %fd5, %p3; 1438; CHECK-NEXT: setp.eq.f64 %p4, %fd4, 0d0000000000000000; 1439; CHECK-NEXT: selp.f64 %fd7, %fd6, %fd4, %p4; 1440; CHECK-NEXT: st.param.f64 [func_retval0], %fd7; 1441; CHECK-NEXT: ret; 1442 %x = call double @llvm.maximum.f64(double %a, double %b) 1443 ret double %x 1444} 1445 1446define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) { 1447; CHECK-NOF16-LABEL: maximum_v2half( 1448; CHECK-NOF16: { 1449; CHECK-NOF16-NEXT: .reg .pred %p<11>; 1450; CHECK-NOF16-NEXT: .reg .b16 %rs<15>; 1451; CHECK-NOF16-NEXT: .reg .b32 %r<4>; 1452; CHECK-NOF16-NEXT: .reg .f32 %f<7>; 1453; CHECK-NOF16-EMPTY: 1454; CHECK-NOF16-NEXT: // %bb.0: 1455; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_v2half_param_0]; 1456; CHECK-NOF16-NEXT: ld.param.b32 %r2, [maximum_v2half_param_1]; 1457; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 1458; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 1459; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 1460; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 1461; CHECK-NOF16-NEXT: setp.gt.f32 %p1, %f2, %f1; 1462; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1; 1463; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %f2, %f1; 1464; CHECK-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; 1465; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs4, 0; 1466; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs4, %rs6, %p3; 1467; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0; 1468; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs2, %rs7, %p4; 1469; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs6; 1470; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; 1471; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs6, %p5; 1472; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; 1473; CHECK-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; 1474; CHECK-NOF16-NEXT: setp.gt.f32 %p6, %f5, %f4; 1475; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs3, %rs1, %p6; 1476; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %f5, %f4; 1477; CHECK-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7; 1478; CHECK-NOF16-NEXT: setp.eq.s16 %p8, %rs3, 0; 1479; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs11, %p8; 1480; CHECK-NOF16-NEXT: setp.eq.s16 %p9, %rs1, 0; 1481; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs1, %rs12, %p9; 1482; CHECK-NOF16-NEXT: cvt.f32.f16 %f6, %rs11; 1483; CHECK-NOF16-NEXT: setp.eq.f32 %p10, %f6, 0f00000000; 1484; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs13, %rs11, %p10; 1485; CHECK-NOF16-NEXT: mov.b32 %r3, {%rs14, %rs9}; 1486; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r3; 1487; CHECK-NOF16-NEXT: ret; 1488; 1489; CHECK-F16-LABEL: maximum_v2half( 1490; CHECK-F16: { 1491; CHECK-F16-NEXT: .reg .b32 %r<4>; 1492; CHECK-F16-EMPTY: 1493; CHECK-F16-NEXT: // %bb.0: 1494; CHECK-F16-NEXT: ld.param.b32 %r1, [maximum_v2half_param_1]; 1495; CHECK-F16-NEXT: ld.param.b32 %r2, [maximum_v2half_param_0]; 1496; CHECK-F16-NEXT: max.NaN.f16x2 %r3, %r2, %r1; 1497; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; 1498; CHECK-F16-NEXT: ret; 1499; 1500; CHECK-SM80-NOF16-LABEL: maximum_v2half( 1501; CHECK-SM80-NOF16: { 1502; CHECK-SM80-NOF16-NEXT: .reg .pred %p<11>; 1503; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<15>; 1504; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>; 1505; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<7>; 1506; CHECK-SM80-NOF16-EMPTY: 1507; CHECK-SM80-NOF16-NEXT: // %bb.0: 1508; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [maximum_v2half_param_0]; 1509; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [maximum_v2half_param_1]; 1510; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; 1511; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; 1512; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; 1513; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; 1514; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p1, %f2, %f1; 1515; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1; 1516; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %f2, %f1; 1517; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; 1518; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs4, 0; 1519; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs4, %rs6, %p3; 1520; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0; 1521; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs2, %rs7, %p4; 1522; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f3, %rs6; 1523; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; 1524; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs6, %p5; 1525; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; 1526; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; 1527; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p6, %f5, %f4; 1528; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs3, %rs1, %p6; 1529; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %f5, %f4; 1530; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7; 1531; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p8, %rs3, 0; 1532; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs11, %p8; 1533; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p9, %rs1, 0; 1534; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs1, %rs12, %p9; 1535; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f6, %rs11; 1536; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p10, %f6, 0f00000000; 1537; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs13, %rs11, %p10; 1538; CHECK-SM80-NOF16-NEXT: mov.b32 %r3, {%rs14, %rs9}; 1539; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r3; 1540; CHECK-SM80-NOF16-NEXT: ret; 1541 %x = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b) 1542 ret <2 x half> %x 1543} 1544 1545; ---- fma ---- 1546 1547define float @fma_float(float %a, float %b, float %c) { 1548; CHECK-LABEL: fma_float( 1549; CHECK: { 1550; CHECK-NEXT: .reg .f32 %f<5>; 1551; CHECK-EMPTY: 1552; CHECK-NEXT: // %bb.0: 1553; CHECK-NEXT: ld.param.f32 %f1, [fma_float_param_0]; 1554; CHECK-NEXT: ld.param.f32 %f2, [fma_float_param_1]; 1555; CHECK-NEXT: ld.param.f32 %f3, [fma_float_param_2]; 1556; CHECK-NEXT: fma.rn.f32 %f4, %f1, %f2, %f3; 1557; CHECK-NEXT: st.param.f32 [func_retval0], %f4; 1558; CHECK-NEXT: ret; 1559 %x = call float @llvm.fma.f32(float %a, float %b, float %c) 1560 ret float %x 1561} 1562 1563define float @fma_float_ftz(float %a, float %b, float %c) #1 { 1564; CHECK-LABEL: fma_float_ftz( 1565; CHECK: { 1566; CHECK-NEXT: .reg .f32 %f<5>; 1567; CHECK-EMPTY: 1568; CHECK-NEXT: // %bb.0: 1569; CHECK-NEXT: ld.param.f32 %f1, [fma_float_ftz_param_0]; 1570; CHECK-NEXT: ld.param.f32 %f2, [fma_float_ftz_param_1]; 1571; CHECK-NEXT: ld.param.f32 %f3, [fma_float_ftz_param_2]; 1572; CHECK-NEXT: fma.rn.ftz.f32 %f4, %f1, %f2, %f3; 1573; CHECK-NEXT: st.param.f32 [func_retval0], %f4; 1574; CHECK-NEXT: ret; 1575 %x = call float @llvm.fma.f32(float %a, float %b, float %c) 1576 ret float %x 1577} 1578 1579define double @fma_double(double %a, double %b, double %c) { 1580; CHECK-LABEL: fma_double( 1581; CHECK: { 1582; CHECK-NEXT: .reg .f64 %fd<5>; 1583; CHECK-EMPTY: 1584; CHECK-NEXT: // %bb.0: 1585; CHECK-NEXT: ld.param.f64 %fd1, [fma_double_param_0]; 1586; CHECK-NEXT: ld.param.f64 %fd2, [fma_double_param_1]; 1587; CHECK-NEXT: ld.param.f64 %fd3, [fma_double_param_2]; 1588; CHECK-NEXT: fma.rn.f64 %fd4, %fd1, %fd2, %fd3; 1589; CHECK-NEXT: st.param.f64 [func_retval0], %fd4; 1590; CHECK-NEXT: ret; 1591 %x = call double @llvm.fma.f64(double %a, double %b, double %c) 1592 ret double %x 1593} 1594 1595attributes #0 = { nounwind readnone } 1596attributes #1 = { "denormal-fp-math-f32" = "preserve-sign" } 1597