xref: /llvm-project/llvm/test/CodeGen/NVPTX/fma-relu-instruction-flag.ll (revision 5e5fd0e6fc50cc1198750308c11433a5b3acfd0f)
18f8016feSHugh Delaney; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2b279f6b0SFangrui Song; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 | FileCheck %s
3b279f6b0SFangrui Song; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
48f8016feSHugh Delaney
58f8016feSHugh Delaney; Using FTZ should emit fma.ftz.relu for f16, not for bf16
6b279f6b0SFangrui Song; RUN: llc < %s -denormal-fp-math-f32=preserve-sign -mtriple=nvptx64 -mcpu=sm_80 | FileCheck %s --check-prefixes=CHECK-FTZ
7b279f6b0SFangrui Song; RUN: %if ptxas %{ llc < %s -denormal-fp-math-f32=preserve-sign -mtriple=nvptx64 -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
88f8016feSHugh Delaney
98f8016feSHugh Delaney; SM < 80 or (which needs PTX version >= 70) should not emit fma{.ftz}.relu
10b279f6b0SFangrui Song; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 | FileCheck %s --check-prefixes=CHECK-SM70
118f8016feSHugh Delaney
128f8016feSHugh Delaneydefine half @fma_f16_expanded_no_nans(half %a, half %b, half %c) {
138f8016feSHugh Delaney; CHECK-LABEL: fma_f16_expanded_no_nans(
148f8016feSHugh Delaney; CHECK:       {
158f8016feSHugh Delaney; CHECK-NEXT:    .reg .b16 %rs<5>;
168f8016feSHugh Delaney; CHECK-EMPTY:
178f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
188f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_no_nans_param_0];
198f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_no_nans_param_1];
208f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_no_nans_param_2];
218f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.f16 %rs4, %rs1, %rs2, %rs3;
228f8016feSHugh Delaney; CHECK-NEXT:    st.param.b16 [func_retval0], %rs4;
238f8016feSHugh Delaney; CHECK-NEXT:    ret;
248f8016feSHugh Delaney;
258f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16_expanded_no_nans(
268f8016feSHugh Delaney; CHECK-FTZ:       {
278f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b16 %rs<5>;
288f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
298f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
308f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_no_nans_param_0];
318f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_no_nans_param_1];
328f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_no_nans_param_2];
338f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.relu.f16 %rs4, %rs1, %rs2, %rs3;
348f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs4;
358f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
368f8016feSHugh Delaney;
378f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16_expanded_no_nans(
388f8016feSHugh Delaney; CHECK-SM70:       {
398f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<2>;
408f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b16 %rs<7>;
418f8016feSHugh Delaney; CHECK-SM70-EMPTY:
428f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
438f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_no_nans_param_0];
448f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_no_nans_param_1];
458f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_no_nans_param_2];
468f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16 %rs4, %rs1, %rs2, %rs3;
478f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b16 %rs5, 0x0000;
488f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f16 %p1, %rs4, %rs5;
498f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b16 %rs6, %rs4, 0x0000, %p1;
508f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs6;
518f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
528f8016feSHugh Delaney  %1 = fmul fast half %a, %b
538f8016feSHugh Delaney  %2 = fadd fast half %1, %c
548f8016feSHugh Delaney  %3 = fcmp nsz ogt half %2, 0.0
558f8016feSHugh Delaney  %4 = select nsz i1 %3, half %2, half 0.0
568f8016feSHugh Delaney  ret half %4
578f8016feSHugh Delaney}
588f8016feSHugh Delaney
598f8016feSHugh Delaney; FMA relu shouldn't be selected if the FMA operation has multiple uses
608f8016feSHugh Delaneydefine half @fma_f16_expanded_no_nans_multiple_uses_of_fma(half %a, half %b, half %c)  {
618f8016feSHugh Delaney; CHECK-LABEL: fma_f16_expanded_no_nans_multiple_uses_of_fma(
628f8016feSHugh Delaney; CHECK:       {
638f8016feSHugh Delaney; CHECK-NEXT:    .reg .b16 %rs<10>;
648f8016feSHugh Delaney; CHECK-EMPTY:
658f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
668f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_0];
678f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_1];
688f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_2];
698f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.f16 %rs4, %rs1, %rs2, %rs3;
708f8016feSHugh Delaney; CHECK-NEXT:    mov.b16 %rs5, 0x0000;
718f8016feSHugh Delaney; CHECK-NEXT:    max.f16 %rs6, %rs4, %rs5;
728f8016feSHugh Delaney; CHECK-NEXT:    mov.b16 %rs7, 0x4700;
738f8016feSHugh Delaney; CHECK-NEXT:    add.rn.f16 %rs8, %rs4, %rs7;
748f8016feSHugh Delaney; CHECK-NEXT:    add.rn.f16 %rs9, %rs6, %rs8;
758f8016feSHugh Delaney; CHECK-NEXT:    st.param.b16 [func_retval0], %rs9;
768f8016feSHugh Delaney; CHECK-NEXT:    ret;
778f8016feSHugh Delaney;
788f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16_expanded_no_nans_multiple_uses_of_fma(
798f8016feSHugh Delaney; CHECK-FTZ:       {
808f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b16 %rs<10>;
818f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
828f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
838f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_0];
848f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_1];
858f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_2];
868f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.f16 %rs4, %rs1, %rs2, %rs3;
878f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b16 %rs5, 0x0000;
888f8016feSHugh Delaney; CHECK-FTZ-NEXT:    max.ftz.f16 %rs6, %rs4, %rs5;
898f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b16 %rs7, 0x4700;
908f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f16 %rs8, %rs4, %rs7;
918f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f16 %rs9, %rs6, %rs8;
928f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs9;
938f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
948f8016feSHugh Delaney;
958f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16_expanded_no_nans_multiple_uses_of_fma(
968f8016feSHugh Delaney; CHECK-SM70:       {
978f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<2>;
988f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b16 %rs<10>;
998f8016feSHugh Delaney; CHECK-SM70-EMPTY:
1008f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
1018f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_0];
1028f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_1];
1038f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_2];
1048f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16 %rs4, %rs1, %rs2, %rs3;
1058f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b16 %rs5, 0x0000;
1068f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f16 %p1, %rs4, %rs5;
1078f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b16 %rs6, %rs4, 0x0000, %p1;
1088f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b16 %rs7, 0x4700;
1098f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f16 %rs8, %rs4, %rs7;
1108f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f16 %rs9, %rs6, %rs8;
1118f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs9;
1128f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
1138f8016feSHugh Delaney  %1 = fmul fast half %a, %b
1148f8016feSHugh Delaney  %2 = fadd fast half %1, %c
1158f8016feSHugh Delaney  %3 = fcmp nsz ogt half %2, 0.0
1168f8016feSHugh Delaney  %4 = select nsz i1 %3, half %2, half 0.0
1178f8016feSHugh Delaney  %5 = fadd half %2, 7.0
1188f8016feSHugh Delaney  %6 = fadd half %4, %5
1198f8016feSHugh Delaney  ret half %6
1208f8016feSHugh Delaney}
1218f8016feSHugh Delaney
1228f8016feSHugh Delaneydefine half @fma_f16_expanded_maxnum_no_nans(half %a, half %b, half %c)  {
1238f8016feSHugh Delaney; CHECK-LABEL: fma_f16_expanded_maxnum_no_nans(
1248f8016feSHugh Delaney; CHECK:       {
1258f8016feSHugh Delaney; CHECK-NEXT:    .reg .b16 %rs<5>;
1268f8016feSHugh Delaney; CHECK-EMPTY:
1278f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
1288f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_maxnum_no_nans_param_0];
1298f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_maxnum_no_nans_param_1];
1308f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_maxnum_no_nans_param_2];
1318f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.f16 %rs4, %rs1, %rs2, %rs3;
1328f8016feSHugh Delaney; CHECK-NEXT:    st.param.b16 [func_retval0], %rs4;
1338f8016feSHugh Delaney; CHECK-NEXT:    ret;
1348f8016feSHugh Delaney;
1358f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16_expanded_maxnum_no_nans(
1368f8016feSHugh Delaney; CHECK-FTZ:       {
1378f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b16 %rs<5>;
1388f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
1398f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
1408f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_maxnum_no_nans_param_0];
1418f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_maxnum_no_nans_param_1];
1428f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_maxnum_no_nans_param_2];
1438f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.relu.f16 %rs4, %rs1, %rs2, %rs3;
1448f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs4;
1458f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
1468f8016feSHugh Delaney;
1478f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16_expanded_maxnum_no_nans(
1488f8016feSHugh Delaney; CHECK-SM70:       {
1498f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b16 %rs<6>;
1508f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<3>;
1518f8016feSHugh Delaney; CHECK-SM70-EMPTY:
1528f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
1538f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_maxnum_no_nans_param_0];
1548f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_maxnum_no_nans_param_1];
1558f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_maxnum_no_nans_param_2];
1568f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16 %rs4, %rs1, %rs2, %rs3;
1578f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.f32.f16 %f1, %rs4;
1588f8016feSHugh Delaney; CHECK-SM70-NEXT:    max.f32 %f2, %f1, 0f00000000;
1598f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.rn.f16.f32 %rs5, %f2;
1608f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs5;
1618f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
1628f8016feSHugh Delaney  %1 = fmul fast half %a, %b
1638f8016feSHugh Delaney  %2 = fadd fast half %1, %c
1648f8016feSHugh Delaney  %3 = call nsz half @llvm.maxnum.f16(half %2, half 0.0)
1658f8016feSHugh Delaney  ret half %3
1668f8016feSHugh Delaney}
1678f8016feSHugh Delaney
1688f8016feSHugh Delaneydefine bfloat @fma_bf16_expanded_no_nans(bfloat %a, bfloat %b, bfloat %c)  {
1698f8016feSHugh Delaney; CHECK-LABEL: fma_bf16_expanded_no_nans(
1708f8016feSHugh Delaney; CHECK:       {
1718f8016feSHugh Delaney; CHECK-NEXT:    .reg .b16 %rs<5>;
1728f8016feSHugh Delaney; CHECK-EMPTY:
1738f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
1748f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_bf16_expanded_no_nans_param_0];
1758f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_bf16_expanded_no_nans_param_1];
1768f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_bf16_expanded_no_nans_param_2];
1778f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.bf16 %rs4, %rs1, %rs2, %rs3;
1788f8016feSHugh Delaney; CHECK-NEXT:    st.param.b16 [func_retval0], %rs4;
1798f8016feSHugh Delaney; CHECK-NEXT:    ret;
1808f8016feSHugh Delaney;
1818f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16_expanded_no_nans(
1828f8016feSHugh Delaney; CHECK-FTZ:       {
1838f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b16 %rs<5>;
1848f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
1858f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
1868f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_bf16_expanded_no_nans_param_0];
1878f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_bf16_expanded_no_nans_param_1];
1888f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_bf16_expanded_no_nans_param_2];
1898f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.relu.bf16 %rs4, %rs1, %rs2, %rs3;
1908f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs4;
1918f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
1928f8016feSHugh Delaney;
1938f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16_expanded_no_nans(
1948f8016feSHugh Delaney; CHECK-SM70:       {
1958f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<3>;
196310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<3>;
1978f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<14>;
1988f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<6>;
1998f8016feSHugh Delaney; CHECK-SM70-EMPTY:
2008f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
2018f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r1, [fma_bf16_expanded_no_nans_param_2];
2028f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r2, %r1, 16;
2038f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r2;
2048f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r3, [fma_bf16_expanded_no_nans_param_1];
2058f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r4, %r3, 16;
2068f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r4;
2078f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r5, [fma_bf16_expanded_no_nans_param_0];
2088f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r6, %r5, 16;
2098f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r6;
2108f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
2118f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r7, %f4;
2128f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r8, %r7, 16, 1;
2138f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r9, %r8, %r7;
2148f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r10, %r9, 32767;
2158f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
2168f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r11, %r7, 4194304;
2178f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r12, %r11, %r10, %p1;
2188f8016feSHugh Delaney; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r12; }
2198f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r13, %r12, -65536;
2208f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r13;
2218f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f32 %p2, %f5, 0f00000000;
222310e7987SAlex MacLean; CHECK-SM70-NEXT:    selp.b16 %rs2, %rs1, 0x0000, %p2;
223310e7987SAlex MacLean; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs2;
2248f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
2258f8016feSHugh Delaney  %1 = fmul fast bfloat %a, %b
2268f8016feSHugh Delaney  %2 = fadd fast bfloat %1, %c
2278f8016feSHugh Delaney  %3 = fcmp nsz ogt bfloat %2, 0.0
2288f8016feSHugh Delaney  %4 = select nsz i1 %3, bfloat %2, bfloat 0.0
2298f8016feSHugh Delaney  ret bfloat %4
2308f8016feSHugh Delaney}
2318f8016feSHugh Delaney
2328f8016feSHugh Delaney; FMA relu shouldn't be selected if the FMA operation has multiple uses
2338f8016feSHugh Delaneydefine bfloat @fma_bf16_expanded_no_nans_multiple_uses_of_fma(bfloat %a, bfloat %b, bfloat %c)  {
2348f8016feSHugh Delaney; CHECK-LABEL: fma_bf16_expanded_no_nans_multiple_uses_of_fma(
2358f8016feSHugh Delaney; CHECK:       {
236*5e5fd0e6Speterbell10; CHECK-NEXT:    .reg .b16 %rs<11>;
2378f8016feSHugh Delaney; CHECK-EMPTY:
2388f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
2398f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_0];
2408f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_1];
2418f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_2];
2428f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.bf16 %rs4, %rs1, %rs2, %rs3;
2438f8016feSHugh Delaney; CHECK-NEXT:    mov.b16 %rs5, 0x0000;
2448f8016feSHugh Delaney; CHECK-NEXT:    max.bf16 %rs6, %rs4, %rs5;
245*5e5fd0e6Speterbell10; CHECK-NEXT:    mov.b16 %rs7, 0x3F80;
246*5e5fd0e6Speterbell10; CHECK-NEXT:    mov.b16 %rs8, 0x40E0;
247*5e5fd0e6Speterbell10; CHECK-NEXT:    fma.rn.bf16 %rs9, %rs4, %rs7, %rs8;
248*5e5fd0e6Speterbell10; CHECK-NEXT:    fma.rn.bf16 %rs10, %rs6, %rs7, %rs9;
249*5e5fd0e6Speterbell10; CHECK-NEXT:    st.param.b16 [func_retval0], %rs10;
2508f8016feSHugh Delaney; CHECK-NEXT:    ret;
2518f8016feSHugh Delaney;
2528f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16_expanded_no_nans_multiple_uses_of_fma(
2538f8016feSHugh Delaney; CHECK-FTZ:       {
254310e7987SAlex MacLean; CHECK-FTZ-NEXT:    .reg .b16 %rs<9>;
2558f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<7>;
2568f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .f32 %f<6>;
2578f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
2588f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
2598f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_0];
2608f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_1];
2618f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_2];
2628f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.bf16 %rs4, %rs1, %rs2, %rs3;
2638f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b16 %rs5, 0x0000;
2648f8016feSHugh Delaney; CHECK-FTZ-NEXT:    max.bf16 %rs6, %rs4, %rs5;
2658f8016feSHugh Delaney; CHECK-FTZ-NEXT:    cvt.u32.u16 %r1, %rs4;
2668f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r2, %r1, 16;
2678f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f1, %r2;
2688f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f32 %f2, %f1, 0f40E00000;
269310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.rn.bf16.f32 %rs7, %f2;
2708f8016feSHugh Delaney; CHECK-FTZ-NEXT:    cvt.u32.u16 %r3, %rs6;
2718f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r4, %r3, 16;
2728f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f3, %r4;
273310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.u32.u16 %r5, %rs7;
2748f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r6, %r5, 16;
2758f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f4, %r6;
2768f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f32 %f5, %f3, %f4;
277310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.rn.bf16.f32 %rs8, %f5;
278310e7987SAlex MacLean; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs8;
2798f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
2808f8016feSHugh Delaney;
2818f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16_expanded_no_nans_multiple_uses_of_fma(
2828f8016feSHugh Delaney; CHECK-SM70:       {
2838f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<5>;
284310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<4>;
2858f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<29>;
2868f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<10>;
2878f8016feSHugh Delaney; CHECK-SM70-EMPTY:
2888f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
2898f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r1, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_2];
2908f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r2, %r1, 16;
2918f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r2;
2928f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r3, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_1];
2938f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r4, %r3, 16;
2948f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r4;
2958f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r5, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_0];
2968f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r6, %r5, 16;
2978f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r6;
2988f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
2998f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r7, %f4;
3008f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r8, %r7, 16, 1;
3018f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r9, %r8, %r7;
3028f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r10, %r9, 32767;
3038f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
3048f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r11, %r7, 4194304;
3058f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r12, %r11, %r10, %p1;
3068f8016feSHugh Delaney; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r12; }
3078f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r13, %r12, -65536;
3088f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r13;
3098f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f32 %p2, %f5, 0f00000000;
310310e7987SAlex MacLean; CHECK-SM70-NEXT:    selp.b16 %rs2, %rs1, 0x0000, %p2;
3118f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f32 %f6, %f5, 0f40E00000;
3128f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r14, %f6;
3138f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r15, %r14, 16, 1;
3148f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r16, %r15, %r14;
3158f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r17, %r16, 32767;
3168f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p3, %f6, %f6;
3178f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r18, %r14, 4194304;
3188f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r19, %r18, %r17, %p3;
319310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r20, %rs2;
3208f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r21, %r20, 16;
3218f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f7, %r21;
3228f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r22, %r19, -65536;
3238f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f8, %r22;
3248f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f32 %f9, %f7, %f8;
3258f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r23, %f9;
3268f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r24, %r23, 16, 1;
3278f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r25, %r24, %r23;
3288f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r26, %r25, 32767;
3298f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p4, %f9, %f9;
3308f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r27, %r23, 4194304;
3318f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r28, %r27, %r26, %p4;
332310e7987SAlex MacLean; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs3}, %r28; }
333310e7987SAlex MacLean; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs3;
3348f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
3358f8016feSHugh Delaney  %1 = fmul fast bfloat %a, %b
3368f8016feSHugh Delaney  %2 = fadd fast bfloat %1, %c
3378f8016feSHugh Delaney  %3 = fcmp nsz ogt bfloat %2, 0.0
3388f8016feSHugh Delaney  %4 = select nsz i1 %3, bfloat %2, bfloat 0.0
3398f8016feSHugh Delaney  %5 = fadd bfloat %2, 7.0
3408f8016feSHugh Delaney  %6 = fadd bfloat %4, %5
3418f8016feSHugh Delaney  ret bfloat %6
3428f8016feSHugh Delaney}
3438f8016feSHugh Delaney
3448f8016feSHugh Delaneydefine bfloat @fma_bf16_expanded_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c)  {
3458f8016feSHugh Delaney;
3468f8016feSHugh Delaney;
3478f8016feSHugh Delaney; CHECK-LABEL: fma_bf16_expanded_maxnum_no_nans(
3488f8016feSHugh Delaney; CHECK:       {
3498f8016feSHugh Delaney; CHECK-NEXT:    .reg .b16 %rs<5>;
3508f8016feSHugh Delaney; CHECK-EMPTY:
3518f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
3528f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_bf16_expanded_maxnum_no_nans_param_0];
3538f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_bf16_expanded_maxnum_no_nans_param_1];
3548f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_bf16_expanded_maxnum_no_nans_param_2];
3558f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.bf16 %rs4, %rs1, %rs2, %rs3;
3568f8016feSHugh Delaney; CHECK-NEXT:    st.param.b16 [func_retval0], %rs4;
3578f8016feSHugh Delaney; CHECK-NEXT:    ret;
3588f8016feSHugh Delaney;
3598f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16_expanded_maxnum_no_nans(
3608f8016feSHugh Delaney; CHECK-FTZ:       {
3618f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b16 %rs<5>;
3628f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
3638f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
3648f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_bf16_expanded_maxnum_no_nans_param_0];
3658f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_bf16_expanded_maxnum_no_nans_param_1];
3668f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_bf16_expanded_maxnum_no_nans_param_2];
3678f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.relu.bf16 %rs4, %rs1, %rs2, %rs3;
3688f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs4;
3698f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
3708f8016feSHugh Delaney;
3718f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16_expanded_maxnum_no_nans(
3728f8016feSHugh Delaney; CHECK-SM70:       {
3738f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<3>;
374310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<2>;
3758f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<20>;
3768f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<7>;
3778f8016feSHugh Delaney; CHECK-SM70-EMPTY:
3788f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
3798f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r1, [fma_bf16_expanded_maxnum_no_nans_param_2];
3808f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r2, %r1, 16;
3818f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r2;
3828f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r3, [fma_bf16_expanded_maxnum_no_nans_param_1];
3838f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r4, %r3, 16;
3848f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r4;
3858f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r5, [fma_bf16_expanded_maxnum_no_nans_param_0];
3868f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r6, %r5, 16;
3878f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r6;
3888f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
3898f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r7, %f4;
3908f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r8, %r7, 16, 1;
3918f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r9, %r8, %r7;
3928f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r10, %r9, 32767;
3938f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
3948f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r11, %r7, 4194304;
3958f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r12, %r11, %r10, %p1;
3968f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r13, %r12, -65536;
3978f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r13;
3988f8016feSHugh Delaney; CHECK-SM70-NEXT:    max.f32 %f6, %f5, 0f00000000;
3998f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r14, %f6;
4008f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r15, %r14, 16, 1;
4018f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r16, %r15, %r14;
4028f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r17, %r16, 32767;
4038f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p2, %f6, %f6;
4048f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r18, %r14, 4194304;
4058f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r19, %r18, %r17, %p2;
4068f8016feSHugh Delaney; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r19; }
4078f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs1;
4088f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
4098f8016feSHugh Delaney  %1 = fmul fast bfloat %a, %b
4108f8016feSHugh Delaney  %2 = fadd fast bfloat %1, %c
4118f8016feSHugh Delaney  %3 = call nsz bfloat @llvm.maxnum.bf16(bfloat %2, bfloat 0.0)
4128f8016feSHugh Delaney  ret bfloat %3
4138f8016feSHugh Delaney}
4148f8016feSHugh Delaney
4158f8016feSHugh Delaneydefine <2 x half> @fma_f16x2_expanded_no_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c)  {
4168f8016feSHugh Delaney;
4178f8016feSHugh Delaney;
4188f8016feSHugh Delaney; CHECK-LABEL: fma_f16x2_expanded_no_nans(
4198f8016feSHugh Delaney; CHECK:       {
4208f8016feSHugh Delaney; CHECK-NEXT:    .reg .b32 %r<5>;
4218f8016feSHugh Delaney; CHECK-EMPTY:
4228f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
4238f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_no_nans_param_2];
4248f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_no_nans_param_1];
4258f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_no_nans_param_0];
4268f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.f16x2 %r4, %r3, %r2, %r1;
4278f8016feSHugh Delaney; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
4288f8016feSHugh Delaney; CHECK-NEXT:    ret;
4298f8016feSHugh Delaney;
4308f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16x2_expanded_no_nans(
4318f8016feSHugh Delaney; CHECK-FTZ:       {
4328f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<5>;
4338f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
4348f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
4358f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_no_nans_param_2];
4368f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_no_nans_param_1];
4378f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_no_nans_param_0];
4388f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.relu.f16x2 %r4, %r3, %r2, %r1;
4398f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r4;
4408f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
4418f8016feSHugh Delaney;
4428f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16x2_expanded_no_nans(
4438f8016feSHugh Delaney; CHECK-SM70:       {
4448f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<3>;
4458f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b16 %rs<5>;
4468f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<7>;
4478f8016feSHugh Delaney; CHECK-SM70-EMPTY:
4488f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
4498f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_no_nans_param_2];
4508f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_no_nans_param_1];
4518f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_no_nans_param_0];
4528f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16x2 %r4, %r3, %r2, %r1;
4538f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r5, 0;
4548f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f16x2 %p1|%p2, %r4, %r5;
4558f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r4;
4568f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b16 %rs3, %rs2, 0x0000, %p2;
4578f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b16 %rs4, %rs1, 0x0000, %p1;
4588f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r6, {%rs4, %rs3};
4598f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r6;
4608f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
4618f8016feSHugh Delaney  %1 = fmul fast <2 x half> %a, %b
4628f8016feSHugh Delaney  %2 = fadd fast <2 x half> %1, %c
4638f8016feSHugh Delaney  %3 = fcmp nsz ogt <2 x half> %2, <half 0.0, half 0.0>
4648f8016feSHugh Delaney  %4 = select nsz <2 x i1> %3, <2 x half> %2, <2 x half> <half 0.0, half 0.0>
4658f8016feSHugh Delaney  ret <2 x half> %4
4668f8016feSHugh Delaney}
4678f8016feSHugh Delaney
4688f8016feSHugh Delaney; FMA relu shouldn't be selected if the FMA operation has multiple uses
4698f8016feSHugh Delaneydefine <2 x half> @fma_f16x2_expanded_no_nans_multiple_uses_of_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c)  {
4708f8016feSHugh Delaney;
4718f8016feSHugh Delaney;
4728f8016feSHugh Delaney; CHECK-LABEL: fma_f16x2_expanded_no_nans_multiple_uses_of_fma(
4738f8016feSHugh Delaney; CHECK:       {
4748f8016feSHugh Delaney; CHECK-NEXT:    .reg .b32 %r<10>;
4758f8016feSHugh Delaney; CHECK-EMPTY:
4768f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
4778f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_no_nans_multiple_uses_of_fma_param_2];
4788f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_no_nans_multiple_uses_of_fma_param_1];
4798f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_no_nans_multiple_uses_of_fma_param_0];
4808f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.f16x2 %r4, %r3, %r2, %r1;
4818f8016feSHugh Delaney; CHECK-NEXT:    mov.b32 %r5, 0;
4828f8016feSHugh Delaney; CHECK-NEXT:    max.f16x2 %r6, %r4, %r5;
4838f8016feSHugh Delaney; CHECK-NEXT:    mov.b32 %r7, 1191200512;
4848f8016feSHugh Delaney; CHECK-NEXT:    add.rn.f16x2 %r8, %r4, %r7;
4858f8016feSHugh Delaney; CHECK-NEXT:    add.rn.f16x2 %r9, %r6, %r8;
4868f8016feSHugh Delaney; CHECK-NEXT:    st.param.b32 [func_retval0], %r9;
4878f8016feSHugh Delaney; CHECK-NEXT:    ret;
4888f8016feSHugh Delaney;
4898f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16x2_expanded_no_nans_multiple_uses_of_fma(
4908f8016feSHugh Delaney; CHECK-FTZ:       {
4918f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<10>;
4928f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
4938f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
4948f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_no_nans_multiple_uses_of_fma_param_2];
4958f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_no_nans_multiple_uses_of_fma_param_1];
4968f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_no_nans_multiple_uses_of_fma_param_0];
4978f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.f16x2 %r4, %r3, %r2, %r1;
4988f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %r5, 0;
4998f8016feSHugh Delaney; CHECK-FTZ-NEXT:    max.ftz.f16x2 %r6, %r4, %r5;
5008f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %r7, 1191200512;
5018f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f16x2 %r8, %r4, %r7;
5028f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f16x2 %r9, %r6, %r8;
5038f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r9;
5048f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
5058f8016feSHugh Delaney;
5068f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16x2_expanded_no_nans_multiple_uses_of_fma(
5078f8016feSHugh Delaney; CHECK-SM70:       {
5088f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<3>;
5098f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b16 %rs<5>;
5108f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<10>;
5118f8016feSHugh Delaney; CHECK-SM70-EMPTY:
5128f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
5138f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_no_nans_multiple_uses_of_fma_param_2];
5148f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_no_nans_multiple_uses_of_fma_param_1];
5158f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_no_nans_multiple_uses_of_fma_param_0];
5168f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16x2 %r4, %r3, %r2, %r1;
5178f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r5, 0;
5188f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f16x2 %p1|%p2, %r4, %r5;
5198f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r4;
5208f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b16 %rs3, %rs2, 0x0000, %p2;
5218f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b16 %rs4, %rs1, 0x0000, %p1;
5228f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r6, {%rs4, %rs3};
5238f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r7, 1191200512;
5248f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f16x2 %r8, %r4, %r7;
5258f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f16x2 %r9, %r6, %r8;
5268f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r9;
5278f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
5288f8016feSHugh Delaney  %1 = fmul fast <2 x half> %a, %b
5298f8016feSHugh Delaney  %2 = fadd fast <2 x half> %1, %c
5308f8016feSHugh Delaney  %3 = fcmp nsz ogt <2 x half> %2, <half 0.0, half 0.0>
5318f8016feSHugh Delaney  %4 = select nsz <2 x i1> %3, <2 x half> %2, <2 x half> <half 0.0, half 0.0>
5328f8016feSHugh Delaney  %5 = fadd <2 x half> %2, <half 7.0, half 7.0>
5338f8016feSHugh Delaney  %6 = fadd <2 x half> %4, %5
5348f8016feSHugh Delaney  ret <2 x half> %6
5358f8016feSHugh Delaney}
5368f8016feSHugh Delaney
5378f8016feSHugh Delaneydefine <2 x half> @fma_f16x2_expanded_maxnum_no_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c)  {
5388f8016feSHugh Delaney; CHECK-LABEL: fma_f16x2_expanded_maxnum_no_nans(
5398f8016feSHugh Delaney; CHECK:       {
5408f8016feSHugh Delaney; CHECK-NEXT:    .reg .b32 %r<5>;
5418f8016feSHugh Delaney; CHECK-EMPTY:
5428f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
5438f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_maxnum_no_nans_param_2];
5448f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_maxnum_no_nans_param_1];
5458f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_maxnum_no_nans_param_0];
5468f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.f16x2 %r4, %r3, %r2, %r1;
5478f8016feSHugh Delaney; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
5488f8016feSHugh Delaney; CHECK-NEXT:    ret;
5498f8016feSHugh Delaney;
5508f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16x2_expanded_maxnum_no_nans(
5518f8016feSHugh Delaney; CHECK-FTZ:       {
5528f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<5>;
5538f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
5548f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
5558f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_maxnum_no_nans_param_2];
5568f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_maxnum_no_nans_param_1];
5578f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_maxnum_no_nans_param_0];
5588f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.relu.f16x2 %r4, %r3, %r2, %r1;
5598f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r4;
5608f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
5618f8016feSHugh Delaney;
5628f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16x2_expanded_maxnum_no_nans(
5638f8016feSHugh Delaney; CHECK-SM70:       {
5648f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b16 %rs<5>;
5658f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<6>;
5668f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<5>;
5678f8016feSHugh Delaney; CHECK-SM70-EMPTY:
5688f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
5698f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_maxnum_no_nans_param_2];
5708f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_maxnum_no_nans_param_1];
5718f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_maxnum_no_nans_param_0];
5728f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16x2 %r4, %r3, %r2, %r1;
5738f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r4;
5748f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.f32.f16 %f1, %rs2;
5758f8016feSHugh Delaney; CHECK-SM70-NEXT:    max.f32 %f2, %f1, 0f00000000;
5768f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.rn.f16.f32 %rs3, %f2;
5778f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.f32.f16 %f3, %rs1;
5788f8016feSHugh Delaney; CHECK-SM70-NEXT:    max.f32 %f4, %f3, 0f00000000;
5798f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.rn.f16.f32 %rs4, %f4;
5808f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r5, {%rs4, %rs3};
5818f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r5;
5828f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
5838f8016feSHugh Delaney  %1 = fmul fast <2 x half> %a, %b
5848f8016feSHugh Delaney  %2 = fadd fast <2 x half> %1, %c
5858f8016feSHugh Delaney  %3 = call nsz <2 x half> @llvm.maxnum.f16x2(<2 x half> %2, <2 x half> <half 0.0, half 0.0>)
5868f8016feSHugh Delaney  ret <2 x half> %3
5878f8016feSHugh Delaney}
5888f8016feSHugh Delaney
5898f8016feSHugh Delaneydefine <2 x bfloat> @fma_bf16x2_expanded_no_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c)  {
5908f8016feSHugh Delaney; CHECK-LABEL: fma_bf16x2_expanded_no_nans(
5918f8016feSHugh Delaney; CHECK:       {
5928f8016feSHugh Delaney; CHECK-NEXT:    .reg .b32 %r<5>;
5938f8016feSHugh Delaney; CHECK-EMPTY:
5948f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
5958f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_no_nans_param_2];
5968f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_no_nans_param_1];
5978f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_no_nans_param_0];
5988f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.bf16x2 %r4, %r3, %r2, %r1;
5998f8016feSHugh Delaney; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
6008f8016feSHugh Delaney; CHECK-NEXT:    ret;
6018f8016feSHugh Delaney;
6028f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16x2_expanded_no_nans(
6038f8016feSHugh Delaney; CHECK-FTZ:       {
6048f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<5>;
6058f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
6068f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
6078f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_no_nans_param_2];
6088f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_no_nans_param_1];
6098f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_no_nans_param_0];
6108f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.relu.bf16x2 %r4, %r3, %r2, %r1;
6118f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r4;
6128f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
6138f8016feSHugh Delaney;
6148f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16x2_expanded_no_nans(
6158f8016feSHugh Delaney; CHECK-SM70:       {
6168f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<5>;
617310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<11>;
6188f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<31>;
6198f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<11>;
6208f8016feSHugh Delaney; CHECK-SM70-EMPTY:
6218f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
6228f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_no_nans_param_0];
6238f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_no_nans_param_1];
6248f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_no_nans_param_2];
6258f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r3;
6268f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r4, %rs1;
6278f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r5, %r4, 16;
6288f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r5;
629310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs3, %rs4}, %r2;
630310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r6, %rs3;
6318f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r7, %r6, 16;
6328f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r7;
633310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs5, %rs6}, %r1;
634310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r8, %rs5;
6358f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r9, %r8, 16;
6368f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r9;
6378f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
6388f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r10, %f4;
6398f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r11, %r10, 16, 1;
6408f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r12, %r11, %r10;
6418f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r13, %r12, 32767;
6428f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
6438f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r14, %r10, 4194304;
6448f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r15, %r14, %r13, %p1;
645310e7987SAlex MacLean; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs7}, %r15; }
6468f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r16, %rs2;
6478f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r17, %r16, 16;
6488f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r17;
649310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r18, %rs4;
6508f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r19, %r18, 16;
6518f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f6, %r19;
652310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r20, %rs6;
6538f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r21, %r20, 16;
6548f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f7, %r21;
6558f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f8, %f7, %f6, %f5;
6568f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r22, %f8;
6578f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r23, %r22, 16, 1;
6588f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r24, %r23, %r22;
6598f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r25, %r24, 32767;
6608f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p2, %f8, %f8;
6618f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r26, %r22, 4194304;
6628f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r27, %r26, %r25, %p2;
663310e7987SAlex MacLean; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs8}, %r27; }
6648f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r28, %r15, -65536;
6658f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f9, %r28;
6668f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f32 %p3, %f9, 0f00000000;
6678f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r29, %r27, -65536;
6688f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f10, %r29;
6698f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f32 %p4, %f10, 0f00000000;
670310e7987SAlex MacLean; CHECK-SM70-NEXT:    selp.b16 %rs9, %rs8, 0x0000, %p4;
671310e7987SAlex MacLean; CHECK-SM70-NEXT:    selp.b16 %rs10, %rs7, 0x0000, %p3;
672310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 %r30, {%rs10, %rs9};
6738f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r30;
6748f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
6758f8016feSHugh Delaney  %1 = fmul fast <2 x bfloat> %a, %b
6768f8016feSHugh Delaney  %2 = fadd fast <2 x bfloat> %1, %c
6778f8016feSHugh Delaney  %3 = fcmp nsz ogt <2 x bfloat> %2, <bfloat 0.0, bfloat 0.0>
6788f8016feSHugh Delaney  %4 = select nsz <2 x i1> %3, <2 x bfloat> %2, <2 x bfloat> <bfloat 0.0, bfloat 0.0>
6798f8016feSHugh Delaney  ret <2 x bfloat> %4
6808f8016feSHugh Delaney}
6818f8016feSHugh Delaney
6828f8016feSHugh Delaney; FMA relu shouldn't be selected if the FMA operation has multiple uses
6838f8016feSHugh Delaneydefine <2 x bfloat> @fma_bf16x2_expanded_no_nans_multiple_uses_of_fma(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c)  {
6848f8016feSHugh Delaney; CHECK-LABEL: fma_bf16x2_expanded_no_nans_multiple_uses_of_fma(
6858f8016feSHugh Delaney; CHECK:       {
686*5e5fd0e6Speterbell10; CHECK-NEXT:    .reg .b32 %r<11>;
6878f8016feSHugh Delaney; CHECK-EMPTY:
6888f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
6898f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_no_nans_multiple_uses_of_fma_param_2];
6908f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_no_nans_multiple_uses_of_fma_param_1];
6918f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_no_nans_multiple_uses_of_fma_param_0];
6928f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.bf16x2 %r4, %r3, %r2, %r1;
6938f8016feSHugh Delaney; CHECK-NEXT:    mov.b32 %r5, 0;
6948f8016feSHugh Delaney; CHECK-NEXT:    max.bf16x2 %r6, %r4, %r5;
695*5e5fd0e6Speterbell10; CHECK-NEXT:    mov.b32 %r7, 1065369472;
696*5e5fd0e6Speterbell10; CHECK-NEXT:    mov.b32 %r8, 1088438496;
697*5e5fd0e6Speterbell10; CHECK-NEXT:    fma.rn.bf16x2 %r9, %r4, %r7, %r8;
698*5e5fd0e6Speterbell10; CHECK-NEXT:    fma.rn.bf16x2 %r10, %r6, %r7, %r9;
699*5e5fd0e6Speterbell10; CHECK-NEXT:    st.param.b32 [func_retval0], %r10;
7008f8016feSHugh Delaney; CHECK-NEXT:    ret;
7018f8016feSHugh Delaney;
7028f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16x2_expanded_no_nans_multiple_uses_of_fma(
7038f8016feSHugh Delaney; CHECK-FTZ:       {
704310e7987SAlex MacLean; CHECK-FTZ-NEXT:    .reg .b16 %rs<7>;
7058f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<20>;
7068f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .f32 %f<11>;
7078f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
7088f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
7098f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_no_nans_multiple_uses_of_fma_param_2];
7108f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_no_nans_multiple_uses_of_fma_param_1];
7118f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_no_nans_multiple_uses_of_fma_param_0];
7128f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.bf16x2 %r4, %r3, %r2, %r1;
7138f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %r5, 0;
7148f8016feSHugh Delaney; CHECK-FTZ-NEXT:    max.bf16x2 %r6, %r4, %r5;
7158f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 {%rs1, %rs2}, %r4;
7164b24ab4bSAlex MacLean; CHECK-FTZ-NEXT:    cvt.u32.u16 %r7, %rs2;
7178f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r8, %r7, 16;
7188f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f1, %r8;
7198f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f32 %f2, %f1, 0f40E00000;
720310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.rn.bf16.f32 %rs3, %f2;
7214b24ab4bSAlex MacLean; CHECK-FTZ-NEXT:    cvt.u32.u16 %r9, %rs1;
7228f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r10, %r9, 16;
7238f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f3, %r10;
7248f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f32 %f4, %f3, 0f40E00000;
725310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.rn.bf16.f32 %rs4, %f4;
726310e7987SAlex MacLean; CHECK-FTZ-NEXT:    mov.b32 {%rs5, %rs6}, %r6;
727310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.u32.u16 %r11, %rs5;
7288f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r12, %r11, 16;
7298f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f5, %r12;
730310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.u32.u16 %r13, %rs4;
7318f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r14, %r13, 16;
7328f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f6, %r14;
7338f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f32 %f7, %f5, %f6;
734310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.u32.u16 %r15, %rs6;
7358f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r16, %r15, 16;
7368f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f8, %r16;
737310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.u32.u16 %r17, %rs3;
7388f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r18, %r17, 16;
7398f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f9, %r18;
7408f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f32 %f10, %f8, %f9;
7414b24ab4bSAlex MacLean; CHECK-FTZ-NEXT:    cvt.rn.bf16x2.f32 %r19, %f10, %f7;
7428f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r19;
7438f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
7448f8016feSHugh Delaney;
7458f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16x2_expanded_no_nans_multiple_uses_of_fma(
7468f8016feSHugh Delaney; CHECK-SM70:       {
7478f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<9>;
748310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<11>;
749310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b32 %r<61>;
7508f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<19>;
7518f8016feSHugh Delaney; CHECK-SM70-EMPTY:
7528f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
7538f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_no_nans_multiple_uses_of_fma_param_0];
7548f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_no_nans_multiple_uses_of_fma_param_1];
7558f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_no_nans_multiple_uses_of_fma_param_2];
7568f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r3;
7578f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r4, %rs2;
7588f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r5, %r4, 16;
7598f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r5;
760310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs3, %rs4}, %r2;
761310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r6, %rs4;
7628f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r7, %r6, 16;
7638f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r7;
764310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs5, %rs6}, %r1;
765310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r8, %rs6;
7668f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r9, %r8, 16;
7678f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r9;
7688f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
7698f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r10, %f4;
7708f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r11, %r10, 16, 1;
7718f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r12, %r11, %r10;
7728f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r13, %r12, 32767;
7738f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
7748f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r14, %r10, 4194304;
7758f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r15, %r14, %r13, %p1;
776310e7987SAlex MacLean; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs7}, %r15; }
7778f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r16, %rs1;
7788f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r17, %r16, 16;
7798f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r17;
780310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r18, %rs3;
7818f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r19, %r18, 16;
7828f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f6, %r19;
783310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r20, %rs5;
7848f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r21, %r20, 16;
7858f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f7, %r21;
7868f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f8, %f7, %f6, %f5;
7878f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r22, %f8;
7888f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r23, %r22, 16, 1;
7898f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r24, %r23, %r22;
7908f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r25, %r24, 32767;
7918f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p2, %f8, %f8;
7928f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r26, %r22, 4194304;
7938f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r27, %r26, %r25, %p2;
794310e7987SAlex MacLean; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs8}, %r27; }
7958f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r28, %r15, -65536;
7968f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f9, %r28;
7978f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f32 %p3, %f9, 0f00000000;
7988f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r29, %r27, -65536;
7998f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f10, %r29;
8008f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f32 %p4, %f10, 0f00000000;
801310e7987SAlex MacLean; CHECK-SM70-NEXT:    selp.b16 %rs9, %rs8, 0x0000, %p4;
802310e7987SAlex MacLean; CHECK-SM70-NEXT:    selp.b16 %rs10, %rs7, 0x0000, %p3;
8038f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f32 %f11, %f10, 0f40E00000;
8048f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r30, %f11;
8058f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r31, %r30, 16, 1;
8068f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r32, %r31, %r30;
8078f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r33, %r32, 32767;
8088f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p5, %f11, %f11;
8098f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r34, %r30, 4194304;
8108f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r35, %r34, %r33, %p5;
8118f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f32 %f12, %f9, 0f40E00000;
8128f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r36, %f12;
8138f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r37, %r36, 16, 1;
8148f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r38, %r37, %r36;
8158f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r39, %r38, 32767;
8168f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p6, %f12, %f12;
8178f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r40, %r36, 4194304;
8188f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r41, %r40, %r39, %p6;
819310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r42, %rs10;
8208f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r43, %r42, 16;
8218f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f13, %r43;
8228f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r44, %r41, -65536;
8238f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f14, %r44;
8248f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f32 %f15, %f13, %f14;
8258f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r45, %f15;
8268f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r46, %r45, 16, 1;
8278f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r47, %r46, %r45;
8288f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r48, %r47, 32767;
8298f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p7, %f15, %f15;
8308f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r49, %r45, 4194304;
8318f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r50, %r49, %r48, %p7;
832310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r51, %rs9;
8338f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r52, %r51, 16;
8348f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f16, %r52;
8358f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r53, %r35, -65536;
8368f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f17, %r53;
8378f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f32 %f18, %f16, %f17;
8388f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r54, %f18;
8398f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r55, %r54, 16, 1;
8408f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r56, %r55, %r54;
8418f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r57, %r56, 32767;
8428f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p8, %f18, %f18;
8438f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r58, %r54, 4194304;
8448f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r59, %r58, %r57, %p8;
845a1f5fe8cSFraser Cormack; CHECK-SM70-NEXT:    prmt.b32 %r60, %r59, %r50, 0x7632U;
8468f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r60;
8478f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
8488f8016feSHugh Delaney  %1 = fmul fast <2 x bfloat> %a, %b
8498f8016feSHugh Delaney  %2 = fadd fast <2 x bfloat> %1, %c
8508f8016feSHugh Delaney  %3 = fcmp nsz ogt <2 x bfloat> %2, <bfloat 0.0, bfloat 0.0>
8518f8016feSHugh Delaney  %4 = select nsz <2 x i1> %3, <2 x bfloat> %2, <2 x bfloat> <bfloat 0.0, bfloat 0.0>
8528f8016feSHugh Delaney  %5 = fadd <2 x bfloat> %2, <bfloat 7.0, bfloat 7.0>
8538f8016feSHugh Delaney  %6 = fadd <2 x bfloat> %4, %5
8548f8016feSHugh Delaney  ret <2 x bfloat> %6
8558f8016feSHugh Delaney}
8568f8016feSHugh Delaney
8578f8016feSHugh Delaneydefine <2 x bfloat> @fma_bf16x2_expanded_maxnum_no_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c)  {
8588f8016feSHugh Delaney; CHECK-LABEL: fma_bf16x2_expanded_maxnum_no_nans(
8598f8016feSHugh Delaney; CHECK:       {
8608f8016feSHugh Delaney; CHECK-NEXT:    .reg .b32 %r<5>;
8618f8016feSHugh Delaney; CHECK-EMPTY:
8628f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
8638f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_maxnum_no_nans_param_2];
8648f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_maxnum_no_nans_param_1];
8658f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_maxnum_no_nans_param_0];
8668f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.bf16x2 %r4, %r3, %r2, %r1;
8678f8016feSHugh Delaney; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
8688f8016feSHugh Delaney; CHECK-NEXT:    ret;
8698f8016feSHugh Delaney;
8708f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16x2_expanded_maxnum_no_nans(
8718f8016feSHugh Delaney; CHECK-FTZ:       {
8728f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<5>;
8738f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
8748f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
8758f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_maxnum_no_nans_param_2];
8768f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_maxnum_no_nans_param_1];
8778f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_maxnum_no_nans_param_0];
8788f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.relu.bf16x2 %r4, %r3, %r2, %r1;
8798f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r4;
8808f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
8818f8016feSHugh Delaney;
8828f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16x2_expanded_maxnum_no_nans(
8838f8016feSHugh Delaney; CHECK-SM70:       {
8848f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<5>;
885310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<7>;
886310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b32 %r<43>;
8878f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<13>;
8888f8016feSHugh Delaney; CHECK-SM70-EMPTY:
8898f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
8908f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_maxnum_no_nans_param_0];
8918f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_maxnum_no_nans_param_1];
8928f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_maxnum_no_nans_param_2];
8938f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r3;
8948f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r4, %rs1;
8958f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r5, %r4, 16;
8968f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r5;
897310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs3, %rs4}, %r2;
898310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r6, %rs3;
8998f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r7, %r6, 16;
9008f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r7;
901310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs5, %rs6}, %r1;
902310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r8, %rs5;
9038f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r9, %r8, 16;
9048f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r9;
9058f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
9068f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r10, %f4;
9078f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r11, %r10, 16, 1;
9088f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r12, %r11, %r10;
9098f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r13, %r12, 32767;
9108f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
9118f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r14, %r10, 4194304;
9128f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r15, %r14, %r13, %p1;
9138f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r16, %rs2;
9148f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r17, %r16, 16;
9158f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r17;
916310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r18, %rs4;
9178f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r19, %r18, 16;
9188f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f6, %r19;
919310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r20, %rs6;
9208f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r21, %r20, 16;
9218f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f7, %r21;
9228f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f8, %f7, %f6, %f5;
9238f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r22, %f8;
9248f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r23, %r22, 16, 1;
9258f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r24, %r23, %r22;
9268f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r25, %r24, 32767;
9278f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p2, %f8, %f8;
9288f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r26, %r22, 4194304;
9298f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r27, %r26, %r25, %p2;
9308f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r28, %r27, -65536;
9318f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f9, %r28;
9328f8016feSHugh Delaney; CHECK-SM70-NEXT:    max.f32 %f10, %f9, 0f00000000;
9338f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r29, %f10;
9348f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r30, %r29, 16, 1;
9358f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r31, %r30, %r29;
9368f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r32, %r31, 32767;
9378f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p3, %f10, %f10;
9388f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r33, %r29, 4194304;
9398f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r34, %r33, %r32, %p3;
9408f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r35, %r15, -65536;
9418f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f11, %r35;
9428f8016feSHugh Delaney; CHECK-SM70-NEXT:    max.f32 %f12, %f11, 0f00000000;
9438f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r36, %f12;
9448f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r37, %r36, 16, 1;
9458f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r38, %r37, %r36;
9468f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r39, %r38, 32767;
9478f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p4, %f12, %f12;
9488f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r40, %r36, 4194304;
9498f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r41, %r40, %r39, %p4;
950a1f5fe8cSFraser Cormack; CHECK-SM70-NEXT:    prmt.b32 %r42, %r41, %r34, 0x7632U;
9518f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r42;
9528f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
9538f8016feSHugh Delaney  %1 = fmul fast <2 x bfloat> %a, %b
9548f8016feSHugh Delaney  %2 = fadd fast <2 x bfloat> %1, %c
9558f8016feSHugh Delaney  %3 = call nsz <2 x bfloat> @llvm.maxnum.bf16x2(<2 x bfloat> %2, <2 x bfloat> <bfloat 0.0, bfloat 0.0>)
9568f8016feSHugh Delaney  ret <2 x bfloat> %3
9578f8016feSHugh Delaney}
9588f8016feSHugh Delaney
9598f8016feSHugh Delaneydefine half @fma_f16_no_nans(half %a, half %b, half %c)  {
9608f8016feSHugh Delaney; CHECK-LABEL: fma_f16_no_nans(
9618f8016feSHugh Delaney; CHECK:       {
9628f8016feSHugh Delaney; CHECK-NEXT:    .reg .b16 %rs<5>;
9638f8016feSHugh Delaney; CHECK-EMPTY:
9648f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
9658f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_f16_no_nans_param_0];
9668f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_f16_no_nans_param_1];
9678f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_f16_no_nans_param_2];
9688f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.f16 %rs4, %rs1, %rs2, %rs3;
9698f8016feSHugh Delaney; CHECK-NEXT:    st.param.b16 [func_retval0], %rs4;
9708f8016feSHugh Delaney; CHECK-NEXT:    ret;
9718f8016feSHugh Delaney;
9728f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16_no_nans(
9738f8016feSHugh Delaney; CHECK-FTZ:       {
9748f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b16 %rs<5>;
9758f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
9768f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
9778f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_f16_no_nans_param_0];
9788f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_f16_no_nans_param_1];
9798f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_f16_no_nans_param_2];
9808f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.relu.f16 %rs4, %rs1, %rs2, %rs3;
9818f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs4;
9828f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
9838f8016feSHugh Delaney;
9848f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16_no_nans(
9858f8016feSHugh Delaney; CHECK-SM70:       {
9868f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<2>;
9878f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b16 %rs<7>;
9888f8016feSHugh Delaney; CHECK-SM70-EMPTY:
9898f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
9908f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs1, [fma_f16_no_nans_param_0];
9918f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs2, [fma_f16_no_nans_param_1];
9928f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs3, [fma_f16_no_nans_param_2];
9938f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16 %rs4, %rs1, %rs2, %rs3;
9948f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b16 %rs5, 0x0000;
9958f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f16 %p1, %rs4, %rs5;
9968f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b16 %rs6, %rs4, 0x0000, %p1;
9978f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs6;
9988f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
9998f8016feSHugh Delaney  %1 = call nnan half @llvm.fma.f16(half %a, half %b, half %c)
10008f8016feSHugh Delaney  %2 = fcmp nsz ogt half %1, 0.0
10018f8016feSHugh Delaney  %3 = select nsz i1 %2, half %1, half 0.0
10028f8016feSHugh Delaney  ret half %3
10038f8016feSHugh Delaney}
10048f8016feSHugh Delaney
10058f8016feSHugh Delaney; FMA relu shouldn't be selected if the FMA operation has multiple uses
10068f8016feSHugh Delaneydefine half @fma_f16_no_nans_multiple_uses_of_fma(half %a, half %b, half %c)  {
10078f8016feSHugh Delaney; CHECK-LABEL: fma_f16_no_nans_multiple_uses_of_fma(
10088f8016feSHugh Delaney; CHECK:       {
10098f8016feSHugh Delaney; CHECK-NEXT:    .reg .b16 %rs<8>;
10108f8016feSHugh Delaney; CHECK-EMPTY:
10118f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
10128f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_f16_no_nans_multiple_uses_of_fma_param_0];
10138f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_f16_no_nans_multiple_uses_of_fma_param_1];
10148f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_f16_no_nans_multiple_uses_of_fma_param_2];
10158f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.f16 %rs4, %rs1, %rs2, %rs3;
10168f8016feSHugh Delaney; CHECK-NEXT:    mov.b16 %rs5, 0x4700;
10178f8016feSHugh Delaney; CHECK-NEXT:    add.rn.f16 %rs6, %rs4, %rs5;
10188f8016feSHugh Delaney; CHECK-NEXT:    add.rn.f16 %rs7, %rs6, %rs4;
10198f8016feSHugh Delaney; CHECK-NEXT:    st.param.b16 [func_retval0], %rs7;
10208f8016feSHugh Delaney; CHECK-NEXT:    ret;
10218f8016feSHugh Delaney;
10228f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16_no_nans_multiple_uses_of_fma(
10238f8016feSHugh Delaney; CHECK-FTZ:       {
10248f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b16 %rs<8>;
10258f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
10268f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
10278f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_f16_no_nans_multiple_uses_of_fma_param_0];
10288f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_f16_no_nans_multiple_uses_of_fma_param_1];
10298f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_f16_no_nans_multiple_uses_of_fma_param_2];
10308f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.f16 %rs4, %rs1, %rs2, %rs3;
10318f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b16 %rs5, 0x4700;
10328f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f16 %rs6, %rs4, %rs5;
10338f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f16 %rs7, %rs6, %rs4;
10348f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs7;
10358f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
10368f8016feSHugh Delaney;
10378f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16_no_nans_multiple_uses_of_fma(
10388f8016feSHugh Delaney; CHECK-SM70:       {
10398f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b16 %rs<8>;
10408f8016feSHugh Delaney; CHECK-SM70-EMPTY:
10418f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
10428f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs1, [fma_f16_no_nans_multiple_uses_of_fma_param_0];
10438f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs2, [fma_f16_no_nans_multiple_uses_of_fma_param_1];
10448f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs3, [fma_f16_no_nans_multiple_uses_of_fma_param_2];
10458f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16 %rs4, %rs1, %rs2, %rs3;
10468f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b16 %rs5, 0x4700;
10478f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f16 %rs6, %rs4, %rs5;
10488f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f16 %rs7, %rs6, %rs4;
10498f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs7;
10508f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
10518f8016feSHugh Delaney  %1 = call nnan half @llvm.fma.f16(half %a, half %b, half %c)
10528f8016feSHugh Delaney  %2 = fcmp nsz ogt half %1, 0.0
10538f8016feSHugh Delaney  %3 = select nsz i1 %2, half %1, half 0.0
10548f8016feSHugh Delaney  %4 = fadd half %1, 7.0
10558f8016feSHugh Delaney  %5 = fadd half %4, %1
10568f8016feSHugh Delaney  ret half %5
10578f8016feSHugh Delaney}
10588f8016feSHugh Delaney
10598f8016feSHugh Delaneydefine half @fma_f16_maxnum_no_nans(half %a, half %b, half %c)  {
10608f8016feSHugh Delaney; CHECK-LABEL: fma_f16_maxnum_no_nans(
10618f8016feSHugh Delaney; CHECK:       {
10628f8016feSHugh Delaney; CHECK-NEXT:    .reg .b16 %rs<5>;
10638f8016feSHugh Delaney; CHECK-EMPTY:
10648f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
10658f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_f16_maxnum_no_nans_param_0];
10668f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_f16_maxnum_no_nans_param_1];
10678f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_f16_maxnum_no_nans_param_2];
10688f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.f16 %rs4, %rs1, %rs2, %rs3;
10698f8016feSHugh Delaney; CHECK-NEXT:    st.param.b16 [func_retval0], %rs4;
10708f8016feSHugh Delaney; CHECK-NEXT:    ret;
10718f8016feSHugh Delaney;
10728f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16_maxnum_no_nans(
10738f8016feSHugh Delaney; CHECK-FTZ:       {
10748f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b16 %rs<5>;
10758f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
10768f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
10778f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_f16_maxnum_no_nans_param_0];
10788f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_f16_maxnum_no_nans_param_1];
10798f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_f16_maxnum_no_nans_param_2];
10808f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.relu.f16 %rs4, %rs1, %rs2, %rs3;
10818f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs4;
10828f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
10838f8016feSHugh Delaney;
10848f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16_maxnum_no_nans(
10858f8016feSHugh Delaney; CHECK-SM70:       {
10868f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b16 %rs<6>;
10878f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<3>;
10888f8016feSHugh Delaney; CHECK-SM70-EMPTY:
10898f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
10908f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs1, [fma_f16_maxnum_no_nans_param_0];
10918f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs2, [fma_f16_maxnum_no_nans_param_1];
10928f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs3, [fma_f16_maxnum_no_nans_param_2];
10938f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16 %rs4, %rs1, %rs2, %rs3;
10948f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.f32.f16 %f1, %rs4;
10958f8016feSHugh Delaney; CHECK-SM70-NEXT:    max.f32 %f2, %f1, 0f00000000;
10968f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.rn.f16.f32 %rs5, %f2;
10978f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs5;
10988f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
10998f8016feSHugh Delaney  %1 = call nnan half @llvm.fma.f16(half %a, half %b, half %c)
11008f8016feSHugh Delaney  %2 = call nsz half @llvm.maxnum.f16(half %1, half 0.0)
11018f8016feSHugh Delaney  ret half %2
11028f8016feSHugh Delaney}
11038f8016feSHugh Delaney
11048f8016feSHugh Delaneydefine bfloat @fma_bf16_no_nans(bfloat %a, bfloat %b, bfloat %c)  {
11058f8016feSHugh Delaney; CHECK-LABEL: fma_bf16_no_nans(
11068f8016feSHugh Delaney; CHECK:       {
11078f8016feSHugh Delaney; CHECK-NEXT:    .reg .b16 %rs<5>;
11088f8016feSHugh Delaney; CHECK-EMPTY:
11098f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
11108f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_bf16_no_nans_param_0];
11118f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_bf16_no_nans_param_1];
11128f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_bf16_no_nans_param_2];
11138f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.bf16 %rs4, %rs1, %rs2, %rs3;
11148f8016feSHugh Delaney; CHECK-NEXT:    st.param.b16 [func_retval0], %rs4;
11158f8016feSHugh Delaney; CHECK-NEXT:    ret;
11168f8016feSHugh Delaney;
11178f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16_no_nans(
11188f8016feSHugh Delaney; CHECK-FTZ:       {
11198f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b16 %rs<5>;
11208f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
11218f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
11228f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_bf16_no_nans_param_0];
11238f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_bf16_no_nans_param_1];
11248f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_bf16_no_nans_param_2];
11258f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.relu.bf16 %rs4, %rs1, %rs2, %rs3;
11268f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs4;
11278f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
11288f8016feSHugh Delaney;
11298f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16_no_nans(
11308f8016feSHugh Delaney; CHECK-SM70:       {
11318f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<3>;
1132310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<3>;
11338f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<14>;
11348f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<6>;
11358f8016feSHugh Delaney; CHECK-SM70-EMPTY:
11368f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
11378f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r1, [fma_bf16_no_nans_param_2];
11388f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r2, %r1, 16;
11398f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r2;
11408f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r3, [fma_bf16_no_nans_param_1];
11418f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r4, %r3, 16;
11428f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r4;
11438f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r5, [fma_bf16_no_nans_param_0];
11448f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r6, %r5, 16;
11458f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r6;
11468f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
11478f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r7, %f4;
11488f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r8, %r7, 16, 1;
11498f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r9, %r8, %r7;
11508f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r10, %r9, 32767;
11518f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
11528f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r11, %r7, 4194304;
11538f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r12, %r11, %r10, %p1;
11548f8016feSHugh Delaney; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r12; }
11558f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r13, %r12, -65536;
11568f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r13;
11578f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f32 %p2, %f5, 0f00000000;
1158310e7987SAlex MacLean; CHECK-SM70-NEXT:    selp.b16 %rs2, %rs1, 0x0000, %p2;
1159310e7987SAlex MacLean; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs2;
11608f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
11618f8016feSHugh Delaney  %1 = call nnan bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
11628f8016feSHugh Delaney  %2 = fcmp nsz ogt bfloat %1, 0.0
11638f8016feSHugh Delaney  %3 = select nsz i1 %2, bfloat %1, bfloat 0.0
11648f8016feSHugh Delaney  ret bfloat %3
11658f8016feSHugh Delaney}
11668f8016feSHugh Delaney
11678f8016feSHugh Delaney; FMA_relu shouldn't be selected if the FMA operation has multiple uses
11688f8016feSHugh Delaneydefine bfloat @fma_bf16_no_nans_multiple_uses_of_fma(bfloat %a, bfloat %b, bfloat %c)  {
11698f8016feSHugh Delaney; CHECK-LABEL: fma_bf16_no_nans_multiple_uses_of_fma(
11708f8016feSHugh Delaney; CHECK:       {
1171*5e5fd0e6Speterbell10; CHECK-NEXT:    .reg .b16 %rs<9>;
11728f8016feSHugh Delaney; CHECK-EMPTY:
11738f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
11748f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_bf16_no_nans_multiple_uses_of_fma_param_0];
11758f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_bf16_no_nans_multiple_uses_of_fma_param_1];
11768f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_bf16_no_nans_multiple_uses_of_fma_param_2];
11778f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.bf16 %rs4, %rs1, %rs2, %rs3;
1178*5e5fd0e6Speterbell10; CHECK-NEXT:    mov.b16 %rs5, 0x3F80;
1179*5e5fd0e6Speterbell10; CHECK-NEXT:    mov.b16 %rs6, 0x40E0;
1180*5e5fd0e6Speterbell10; CHECK-NEXT:    fma.rn.bf16 %rs7, %rs4, %rs5, %rs6;
1181*5e5fd0e6Speterbell10; CHECK-NEXT:    fma.rn.bf16 %rs8, %rs7, %rs5, %rs4;
1182*5e5fd0e6Speterbell10; CHECK-NEXT:    st.param.b16 [func_retval0], %rs8;
11838f8016feSHugh Delaney; CHECK-NEXT:    ret;
11848f8016feSHugh Delaney;
11858f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16_no_nans_multiple_uses_of_fma(
11868f8016feSHugh Delaney; CHECK-FTZ:       {
1187310e7987SAlex MacLean; CHECK-FTZ-NEXT:    .reg .b16 %rs<7>;
11888f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<5>;
11898f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .f32 %f<5>;
11908f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
11918f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
11928f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_bf16_no_nans_multiple_uses_of_fma_param_0];
11938f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_bf16_no_nans_multiple_uses_of_fma_param_1];
11948f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_bf16_no_nans_multiple_uses_of_fma_param_2];
11958f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.bf16 %rs4, %rs1, %rs2, %rs3;
11968f8016feSHugh Delaney; CHECK-FTZ-NEXT:    cvt.u32.u16 %r1, %rs4;
11978f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r2, %r1, 16;
11988f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f1, %r2;
11998f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f32 %f2, %f1, 0f40E00000;
1200310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.rn.bf16.f32 %rs5, %f2;
1201310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.u32.u16 %r3, %rs5;
12028f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r4, %r3, 16;
12038f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f3, %r4;
12048f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f32 %f4, %f3, %f1;
1205310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.rn.bf16.f32 %rs6, %f4;
1206310e7987SAlex MacLean; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs6;
12078f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
12088f8016feSHugh Delaney;
12098f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16_no_nans_multiple_uses_of_fma(
12108f8016feSHugh Delaney; CHECK-SM70:       {
12118f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<4>;
1212310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<2>;
12138f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<27>;
12148f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<9>;
12158f8016feSHugh Delaney; CHECK-SM70-EMPTY:
12168f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
12178f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r1, [fma_bf16_no_nans_multiple_uses_of_fma_param_2];
12188f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r2, %r1, 16;
12198f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r2;
12208f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r3, [fma_bf16_no_nans_multiple_uses_of_fma_param_1];
12218f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r4, %r3, 16;
12228f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r4;
12238f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r5, [fma_bf16_no_nans_multiple_uses_of_fma_param_0];
12248f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r6, %r5, 16;
12258f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r6;
12268f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
12278f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r7, %f4;
12288f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r8, %r7, 16, 1;
12298f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r9, %r8, %r7;
12308f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r10, %r9, 32767;
12318f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
12328f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r11, %r7, 4194304;
12338f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r12, %r11, %r10, %p1;
12348f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r13, %r12, -65536;
12358f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r13;
12368f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f32 %f6, %f5, 0f40E00000;
12378f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r14, %f6;
12388f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r15, %r14, 16, 1;
12398f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r16, %r15, %r14;
12408f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r17, %r16, 32767;
12418f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p2, %f6, %f6;
12428f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r18, %r14, 4194304;
12438f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r19, %r18, %r17, %p2;
12448f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r20, %r19, -65536;
12458f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f7, %r20;
12468f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f32 %f8, %f7, %f5;
12478f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r21, %f8;
12488f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r22, %r21, 16, 1;
12498f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r23, %r22, %r21;
12508f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r24, %r23, 32767;
12518f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p3, %f8, %f8;
12528f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r25, %r21, 4194304;
12538f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r26, %r25, %r24, %p3;
12548f8016feSHugh Delaney; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r26; }
12558f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs1;
12568f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
12578f8016feSHugh Delaney  %1 = call nnan bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
12588f8016feSHugh Delaney  %2 = fcmp nsz ogt bfloat %1, 0.0
12598f8016feSHugh Delaney  %3 = select nsz i1 %2, bfloat %1, bfloat 0.0
12608f8016feSHugh Delaney  %4 = fadd bfloat %1, 7.0
12618f8016feSHugh Delaney  %5 = fadd bfloat %4, %1
12628f8016feSHugh Delaney  ret bfloat %5
12638f8016feSHugh Delaney}
12648f8016feSHugh Delaney
12658f8016feSHugh Delaneydefine bfloat @fma_bf16_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c)  {
12668f8016feSHugh Delaney; CHECK-LABEL: fma_bf16_maxnum_no_nans(
12678f8016feSHugh Delaney; CHECK:       {
12688f8016feSHugh Delaney; CHECK-NEXT:    .reg .b16 %rs<5>;
12698f8016feSHugh Delaney; CHECK-EMPTY:
12708f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
12718f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_bf16_maxnum_no_nans_param_0];
12728f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_bf16_maxnum_no_nans_param_1];
12738f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_bf16_maxnum_no_nans_param_2];
12748f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.bf16 %rs4, %rs1, %rs2, %rs3;
12758f8016feSHugh Delaney; CHECK-NEXT:    st.param.b16 [func_retval0], %rs4;
12768f8016feSHugh Delaney; CHECK-NEXT:    ret;
12778f8016feSHugh Delaney;
12788f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16_maxnum_no_nans(
12798f8016feSHugh Delaney; CHECK-FTZ:       {
12808f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b16 %rs<5>;
12818f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
12828f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
12838f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_bf16_maxnum_no_nans_param_0];
12848f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_bf16_maxnum_no_nans_param_1];
12858f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_bf16_maxnum_no_nans_param_2];
12868f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.relu.bf16 %rs4, %rs1, %rs2, %rs3;
12878f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs4;
12888f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
12898f8016feSHugh Delaney;
12908f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16_maxnum_no_nans(
12918f8016feSHugh Delaney; CHECK-SM70:       {
12928f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<3>;
1293310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<2>;
12948f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<20>;
12958f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<7>;
12968f8016feSHugh Delaney; CHECK-SM70-EMPTY:
12978f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
12988f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r1, [fma_bf16_maxnum_no_nans_param_2];
12998f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r2, %r1, 16;
13008f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r2;
13018f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r3, [fma_bf16_maxnum_no_nans_param_1];
13028f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r4, %r3, 16;
13038f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r4;
13048f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r5, [fma_bf16_maxnum_no_nans_param_0];
13058f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r6, %r5, 16;
13068f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r6;
13078f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
13088f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r7, %f4;
13098f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r8, %r7, 16, 1;
13108f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r9, %r8, %r7;
13118f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r10, %r9, 32767;
13128f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
13138f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r11, %r7, 4194304;
13148f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r12, %r11, %r10, %p1;
13158f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r13, %r12, -65536;
13168f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r13;
13178f8016feSHugh Delaney; CHECK-SM70-NEXT:    max.f32 %f6, %f5, 0f00000000;
13188f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r14, %f6;
13198f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r15, %r14, 16, 1;
13208f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r16, %r15, %r14;
13218f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r17, %r16, 32767;
13228f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p2, %f6, %f6;
13238f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r18, %r14, 4194304;
13248f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r19, %r18, %r17, %p2;
13258f8016feSHugh Delaney; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r19; }
13268f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs1;
13278f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
13288f8016feSHugh Delaney  %1 = call nnan bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
13298f8016feSHugh Delaney  %2 = call nsz bfloat @llvm.maxnum.bf16(bfloat %1, bfloat 0.0)
13308f8016feSHugh Delaney  ret bfloat %2
13318f8016feSHugh Delaney}
13328f8016feSHugh Delaney
13338f8016feSHugh Delaneydefine <2 x half> @fma_f16x2_no_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c)  {
13348f8016feSHugh Delaney; CHECK-LABEL: fma_f16x2_no_nans(
13358f8016feSHugh Delaney; CHECK:       {
13368f8016feSHugh Delaney; CHECK-NEXT:    .reg .b32 %r<7>;
13378f8016feSHugh Delaney; CHECK-EMPTY:
13388f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
13398f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_f16x2_no_nans_param_2];
13408f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_f16x2_no_nans_param_1];
13418f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_f16x2_no_nans_param_0];
13428f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.f16x2 %r4, %r3, %r2, %r1;
13438f8016feSHugh Delaney; CHECK-NEXT:    mov.b32 %r5, 0;
13448f8016feSHugh Delaney; CHECK-NEXT:    max.f16x2 %r6, %r4, %r5;
13458f8016feSHugh Delaney; CHECK-NEXT:    st.param.b32 [func_retval0], %r6;
13468f8016feSHugh Delaney; CHECK-NEXT:    ret;
13478f8016feSHugh Delaney;
13488f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16x2_no_nans(
13498f8016feSHugh Delaney; CHECK-FTZ:       {
13508f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<7>;
13518f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
13528f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
13538f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_f16x2_no_nans_param_2];
13548f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_f16x2_no_nans_param_1];
13558f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_f16x2_no_nans_param_0];
13568f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.f16x2 %r4, %r3, %r2, %r1;
13578f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %r5, 0;
13588f8016feSHugh Delaney; CHECK-FTZ-NEXT:    max.ftz.f16x2 %r6, %r4, %r5;
13598f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r6;
13608f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
13618f8016feSHugh Delaney;
13628f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16x2_no_nans(
13638f8016feSHugh Delaney; CHECK-SM70:       {
13648f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<3>;
13658f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b16 %rs<5>;
13668f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<7>;
13678f8016feSHugh Delaney; CHECK-SM70-EMPTY:
13688f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
13698f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_f16x2_no_nans_param_2];
13708f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_f16x2_no_nans_param_1];
13718f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_f16x2_no_nans_param_0];
13728f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16x2 %r4, %r3, %r2, %r1;
13738f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r5, 0;
13748f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f16x2 %p1|%p2, %r4, %r5;
13758f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r4;
13768f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b16 %rs3, %rs2, 0x0000, %p2;
13778f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b16 %rs4, %rs1, 0x0000, %p1;
13788f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r6, {%rs4, %rs3};
13798f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r6;
13808f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
13818f8016feSHugh Delaney  %1 = call <2 x half> @llvm.fma.f16x2(<2 x half> %a, <2 x half> %b, <2 x half> %c)
13828f8016feSHugh Delaney  %2 = fcmp nsz ogt <2 x half> %1, <half 0.0, half 0.0>
13838f8016feSHugh Delaney  %3 = select nsz <2 x i1> %2, <2 x half> %1, <2 x half> <half 0.0, half 0.0>
13848f8016feSHugh Delaney  ret <2 x half> %3
13858f8016feSHugh Delaney}
13868f8016feSHugh Delaney
13878f8016feSHugh Delaney; FMA relu shouldn't be selected if the FMA operation has multiple uses
13888f8016feSHugh Delaneydefine <2 x half> @fma_f16x2_no_nans_multiple_uses_of_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c)  {
13898f8016feSHugh Delaney; CHECK-LABEL: fma_f16x2_no_nans_multiple_uses_of_fma(
13908f8016feSHugh Delaney; CHECK:       {
13918f8016feSHugh Delaney; CHECK-NEXT:    .reg .b32 %r<8>;
13928f8016feSHugh Delaney; CHECK-EMPTY:
13938f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
13948f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_f16x2_no_nans_multiple_uses_of_fma_param_2];
13958f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_f16x2_no_nans_multiple_uses_of_fma_param_1];
13968f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_f16x2_no_nans_multiple_uses_of_fma_param_0];
13978f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.f16x2 %r4, %r3, %r2, %r1;
13988f8016feSHugh Delaney; CHECK-NEXT:    mov.b32 %r5, 1191200512;
13998f8016feSHugh Delaney; CHECK-NEXT:    add.rn.f16x2 %r6, %r4, %r5;
14008f8016feSHugh Delaney; CHECK-NEXT:    add.rn.f16x2 %r7, %r6, %r4;
14018f8016feSHugh Delaney; CHECK-NEXT:    st.param.b32 [func_retval0], %r7;
14028f8016feSHugh Delaney; CHECK-NEXT:    ret;
14038f8016feSHugh Delaney;
14048f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16x2_no_nans_multiple_uses_of_fma(
14058f8016feSHugh Delaney; CHECK-FTZ:       {
14068f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<8>;
14078f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
14088f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
14098f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_f16x2_no_nans_multiple_uses_of_fma_param_2];
14108f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_f16x2_no_nans_multiple_uses_of_fma_param_1];
14118f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_f16x2_no_nans_multiple_uses_of_fma_param_0];
14128f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.f16x2 %r4, %r3, %r2, %r1;
14138f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %r5, 1191200512;
14148f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f16x2 %r6, %r4, %r5;
14158f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f16x2 %r7, %r6, %r4;
14168f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r7;
14178f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
14188f8016feSHugh Delaney;
14198f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16x2_no_nans_multiple_uses_of_fma(
14208f8016feSHugh Delaney; CHECK-SM70:       {
14218f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<8>;
14228f8016feSHugh Delaney; CHECK-SM70-EMPTY:
14238f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
14248f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_f16x2_no_nans_multiple_uses_of_fma_param_2];
14258f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_f16x2_no_nans_multiple_uses_of_fma_param_1];
14268f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_f16x2_no_nans_multiple_uses_of_fma_param_0];
14278f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16x2 %r4, %r3, %r2, %r1;
14288f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r5, 1191200512;
14298f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f16x2 %r6, %r4, %r5;
14308f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f16x2 %r7, %r6, %r4;
14318f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r7;
14328f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
14338f8016feSHugh Delaney  %1 = call nnan <2 x half> @llvm.fma.f16x2(<2 x half> %a, <2 x half> %b, <2 x half> %c)
14348f8016feSHugh Delaney  %2 = fcmp nsz ogt <2 x half> %1, <half 0.0, half 0.0>
14358f8016feSHugh Delaney  %3 = select nsz <2 x i1> %2, <2 x half> %1, <2 x half> <half 0.0, half 0.0>
14368f8016feSHugh Delaney  %4 = fadd <2 x half> %1, <half 7.0, half 7.0>
14378f8016feSHugh Delaney  %5 = fadd <2 x half> %4, %1
14388f8016feSHugh Delaney  ret <2 x half> %5
14398f8016feSHugh Delaney}
14408f8016feSHugh Delaney
14418f8016feSHugh Delaneydefine <2 x half> @fma_f16x2_maxnum_no_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c)  {
14428f8016feSHugh Delaney; CHECK-LABEL: fma_f16x2_maxnum_no_nans(
14438f8016feSHugh Delaney; CHECK:       {
14448f8016feSHugh Delaney; CHECK-NEXT:    .reg .b32 %r<5>;
14458f8016feSHugh Delaney; CHECK-EMPTY:
14468f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
14478f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_f16x2_maxnum_no_nans_param_2];
14488f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_f16x2_maxnum_no_nans_param_1];
14498f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_f16x2_maxnum_no_nans_param_0];
14508f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.f16x2 %r4, %r3, %r2, %r1;
14518f8016feSHugh Delaney; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
14528f8016feSHugh Delaney; CHECK-NEXT:    ret;
14538f8016feSHugh Delaney;
14548f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16x2_maxnum_no_nans(
14558f8016feSHugh Delaney; CHECK-FTZ:       {
14568f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<5>;
14578f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
14588f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
14598f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_f16x2_maxnum_no_nans_param_2];
14608f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_f16x2_maxnum_no_nans_param_1];
14618f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_f16x2_maxnum_no_nans_param_0];
14628f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.relu.f16x2 %r4, %r3, %r2, %r1;
14638f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r4;
14648f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
14658f8016feSHugh Delaney;
14668f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16x2_maxnum_no_nans(
14678f8016feSHugh Delaney; CHECK-SM70:       {
14688f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b16 %rs<5>;
14698f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<6>;
14708f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<5>;
14718f8016feSHugh Delaney; CHECK-SM70-EMPTY:
14728f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
14738f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_f16x2_maxnum_no_nans_param_2];
14748f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_f16x2_maxnum_no_nans_param_1];
14758f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_f16x2_maxnum_no_nans_param_0];
14768f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16x2 %r4, %r3, %r2, %r1;
14778f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r4;
14788f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.f32.f16 %f1, %rs2;
14798f8016feSHugh Delaney; CHECK-SM70-NEXT:    max.f32 %f2, %f1, 0f00000000;
14808f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.rn.f16.f32 %rs3, %f2;
14818f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.f32.f16 %f3, %rs1;
14828f8016feSHugh Delaney; CHECK-SM70-NEXT:    max.f32 %f4, %f3, 0f00000000;
14838f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.rn.f16.f32 %rs4, %f4;
14848f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r5, {%rs4, %rs3};
14858f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r5;
14868f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
14878f8016feSHugh Delaney  %1 = call nnan <2 x half> @llvm.fma.f16x2(<2 x half> %a, <2 x half> %b, <2 x half> %c)
14888f8016feSHugh Delaney  %2 = call nsz <2 x half> @llvm.maxnum.f16x2(<2 x half> %1, <2 x half> <half 0.0, half 0.0>)
14898f8016feSHugh Delaney  ret <2 x half> %2
14908f8016feSHugh Delaney}
14918f8016feSHugh Delaney
14928f8016feSHugh Delaneydefine <2 x bfloat> @fma_bf16x2_no_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c)  {
14938f8016feSHugh Delaney; CHECK-LABEL: fma_bf16x2_no_nans(
14948f8016feSHugh Delaney; CHECK:       {
14958f8016feSHugh Delaney; CHECK-NEXT:    .reg .b32 %r<5>;
14968f8016feSHugh Delaney; CHECK-EMPTY:
14978f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
14988f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_bf16x2_no_nans_param_2];
14998f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_bf16x2_no_nans_param_1];
15008f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_bf16x2_no_nans_param_0];
15018f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.bf16x2 %r4, %r3, %r2, %r1;
15028f8016feSHugh Delaney; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
15038f8016feSHugh Delaney; CHECK-NEXT:    ret;
15048f8016feSHugh Delaney;
15058f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16x2_no_nans(
15068f8016feSHugh Delaney; CHECK-FTZ:       {
15078f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<5>;
15088f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
15098f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
15108f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_bf16x2_no_nans_param_2];
15118f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_bf16x2_no_nans_param_1];
15128f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_bf16x2_no_nans_param_0];
15138f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.relu.bf16x2 %r4, %r3, %r2, %r1;
15148f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r4;
15158f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
15168f8016feSHugh Delaney;
15178f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16x2_no_nans(
15188f8016feSHugh Delaney; CHECK-SM70:       {
15198f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<5>;
1520310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<11>;
15218f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<31>;
15228f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<11>;
15238f8016feSHugh Delaney; CHECK-SM70-EMPTY:
15248f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
15258f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_bf16x2_no_nans_param_0];
15268f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_bf16x2_no_nans_param_1];
15278f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_bf16x2_no_nans_param_2];
15288f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r3;
15298f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r4, %rs1;
15308f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r5, %r4, 16;
15318f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r5;
1532310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs3, %rs4}, %r2;
1533310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r6, %rs3;
15348f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r7, %r6, 16;
15358f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r7;
1536310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs5, %rs6}, %r1;
1537310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r8, %rs5;
15388f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r9, %r8, 16;
15398f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r9;
15408f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
15418f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r10, %f4;
15428f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r11, %r10, 16, 1;
15438f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r12, %r11, %r10;
15448f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r13, %r12, 32767;
15458f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
15468f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r14, %r10, 4194304;
15478f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r15, %r14, %r13, %p1;
1548310e7987SAlex MacLean; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs7}, %r15; }
15498f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r16, %rs2;
15508f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r17, %r16, 16;
15518f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r17;
1552310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r18, %rs4;
15538f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r19, %r18, 16;
15548f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f6, %r19;
1555310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r20, %rs6;
15568f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r21, %r20, 16;
15578f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f7, %r21;
15588f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f8, %f7, %f6, %f5;
15598f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r22, %f8;
15608f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r23, %r22, 16, 1;
15618f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r24, %r23, %r22;
15628f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r25, %r24, 32767;
15638f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p2, %f8, %f8;
15648f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r26, %r22, 4194304;
15658f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r27, %r26, %r25, %p2;
1566310e7987SAlex MacLean; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs8}, %r27; }
15678f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r28, %r15, -65536;
15688f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f9, %r28;
15698f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f32 %p3, %f9, 0f00000000;
15708f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r29, %r27, -65536;
15718f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f10, %r29;
15728f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f32 %p4, %f10, 0f00000000;
1573310e7987SAlex MacLean; CHECK-SM70-NEXT:    selp.b16 %rs9, %rs8, 0x0000, %p4;
1574310e7987SAlex MacLean; CHECK-SM70-NEXT:    selp.b16 %rs10, %rs7, 0x0000, %p3;
1575310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 %r30, {%rs10, %rs9};
15768f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r30;
15778f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
15788f8016feSHugh Delaney  %1 = call nnan <2 x bfloat> @llvm.fma.bf16x2(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c)
15798f8016feSHugh Delaney  %2 = fcmp nsz ogt <2 x bfloat> %1, <bfloat 0.0, bfloat 0.0>
15808f8016feSHugh Delaney  %3 = select nsz <2 x i1> %2, <2 x bfloat> %1, <2 x bfloat> <bfloat 0.0, bfloat 0.0>
15818f8016feSHugh Delaney  ret <2 x bfloat> %3
15828f8016feSHugh Delaney}
15838f8016feSHugh Delaney
15848f8016feSHugh Delaney; FMA_relu shouldn't be selected if the FMA operation has multiple uses
15858f8016feSHugh Delaneydefine <2 x bfloat> @fma_bf16x2_no_nans_multiple_uses_of_fma(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c)  {
15868f8016feSHugh Delaney; CHECK-LABEL: fma_bf16x2_no_nans_multiple_uses_of_fma(
15878f8016feSHugh Delaney; CHECK:       {
1588*5e5fd0e6Speterbell10; CHECK-NEXT:    .reg .b32 %r<9>;
15898f8016feSHugh Delaney; CHECK-EMPTY:
15908f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
15918f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_bf16x2_no_nans_multiple_uses_of_fma_param_2];
15928f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_bf16x2_no_nans_multiple_uses_of_fma_param_1];
15938f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_bf16x2_no_nans_multiple_uses_of_fma_param_0];
15948f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.bf16x2 %r4, %r3, %r2, %r1;
1595*5e5fd0e6Speterbell10; CHECK-NEXT:    mov.b32 %r5, 1065369472;
1596*5e5fd0e6Speterbell10; CHECK-NEXT:    mov.b32 %r6, 1088438496;
1597*5e5fd0e6Speterbell10; CHECK-NEXT:    fma.rn.bf16x2 %r7, %r4, %r5, %r6;
1598*5e5fd0e6Speterbell10; CHECK-NEXT:    fma.rn.bf16x2 %r8, %r7, %r5, %r4;
1599*5e5fd0e6Speterbell10; CHECK-NEXT:    st.param.b32 [func_retval0], %r8;
16008f8016feSHugh Delaney; CHECK-NEXT:    ret;
16018f8016feSHugh Delaney;
16028f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16x2_no_nans_multiple_uses_of_fma(
16038f8016feSHugh Delaney; CHECK-FTZ:       {
1604310e7987SAlex MacLean; CHECK-FTZ-NEXT:    .reg .b16 %rs<5>;
16058f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<14>;
16068f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .f32 %f<9>;
16078f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
16088f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
16098f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_bf16x2_no_nans_multiple_uses_of_fma_param_2];
16108f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_bf16x2_no_nans_multiple_uses_of_fma_param_1];
16118f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_bf16x2_no_nans_multiple_uses_of_fma_param_0];
16128f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.bf16x2 %r4, %r3, %r2, %r1;
16138f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 {%rs1, %rs2}, %r4;
16144b24ab4bSAlex MacLean; CHECK-FTZ-NEXT:    cvt.u32.u16 %r5, %rs2;
16158f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r6, %r5, 16;
16168f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f1, %r6;
16178f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f32 %f2, %f1, 0f40E00000;
1618310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.rn.bf16.f32 %rs3, %f2;
16194b24ab4bSAlex MacLean; CHECK-FTZ-NEXT:    cvt.u32.u16 %r7, %rs1;
16208f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r8, %r7, 16;
16218f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f3, %r8;
16228f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f32 %f4, %f3, 0f40E00000;
1623310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.rn.bf16.f32 %rs4, %f4;
1624310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.u32.u16 %r9, %rs4;
16258f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r10, %r9, 16;
16268f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f5, %r10;
16278f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f32 %f6, %f5, %f3;
1628310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.u32.u16 %r11, %rs3;
16298f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r12, %r11, 16;
16308f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f7, %r12;
16318f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.rn.ftz.f32 %f8, %f7, %f1;
16324b24ab4bSAlex MacLean; CHECK-FTZ-NEXT:    cvt.rn.bf16x2.f32 %r13, %f8, %f6;
16338f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r13;
16348f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
16358f8016feSHugh Delaney;
16368f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16x2_no_nans_multiple_uses_of_fma(
16378f8016feSHugh Delaney; CHECK-SM70:       {
16388f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<7>;
1639310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<7>;
1640310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b32 %r<57>;
16418f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<17>;
16428f8016feSHugh Delaney; CHECK-SM70-EMPTY:
16438f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
16448f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_bf16x2_no_nans_multiple_uses_of_fma_param_0];
16458f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_bf16x2_no_nans_multiple_uses_of_fma_param_1];
16468f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_bf16x2_no_nans_multiple_uses_of_fma_param_2];
16478f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r3;
16488f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r4, %rs2;
16498f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r5, %r4, 16;
16508f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r5;
1651310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs3, %rs4}, %r2;
1652310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r6, %rs4;
16538f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r7, %r6, 16;
16548f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r7;
1655310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs5, %rs6}, %r1;
1656310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r8, %rs6;
16578f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r9, %r8, 16;
16588f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r9;
16598f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
16608f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r10, %f4;
16618f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r11, %r10, 16, 1;
16628f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r12, %r11, %r10;
16638f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r13, %r12, 32767;
16648f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
16658f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r14, %r10, 4194304;
16668f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r15, %r14, %r13, %p1;
16678f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r16, %rs1;
16688f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r17, %r16, 16;
16698f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r17;
1670310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r18, %rs3;
16718f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r19, %r18, 16;
16728f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f6, %r19;
1673310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r20, %rs5;
16748f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r21, %r20, 16;
16758f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f7, %r21;
16768f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f8, %f7, %f6, %f5;
16778f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r22, %f8;
16788f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r23, %r22, 16, 1;
16798f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r24, %r23, %r22;
16808f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r25, %r24, 32767;
16818f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p2, %f8, %f8;
16828f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r26, %r22, 4194304;
16838f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r27, %r26, %r25, %p2;
16848f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r28, %r27, -65536;
16858f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f9, %r28;
16868f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f32 %f10, %f9, 0f40E00000;
16878f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r29, %f10;
16888f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r30, %r29, 16, 1;
16898f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r31, %r30, %r29;
16908f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r32, %r31, 32767;
16918f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p3, %f10, %f10;
16928f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r33, %r29, 4194304;
16938f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r34, %r33, %r32, %p3;
16948f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r35, %r15, -65536;
16958f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f11, %r35;
16968f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f32 %f12, %f11, 0f40E00000;
16978f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r36, %f12;
16988f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r37, %r36, 16, 1;
16998f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r38, %r37, %r36;
17008f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r39, %r38, 32767;
17018f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p4, %f12, %f12;
17028f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r40, %r36, 4194304;
17038f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r41, %r40, %r39, %p4;
17048f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r42, %r41, -65536;
17058f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f13, %r42;
17068f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f32 %f14, %f13, %f11;
17078f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r43, %f14;
17088f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r44, %r43, 16, 1;
17098f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r45, %r44, %r43;
17108f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r46, %r45, 32767;
17118f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p5, %f14, %f14;
17128f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r47, %r43, 4194304;
17138f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r48, %r47, %r46, %p5;
17148f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r49, %r34, -65536;
17158f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f15, %r49;
17168f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.rn.f32 %f16, %f15, %f9;
17178f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r50, %f16;
17188f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r51, %r50, 16, 1;
17198f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r52, %r51, %r50;
17208f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r53, %r52, 32767;
17218f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p6, %f16, %f16;
17228f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r54, %r50, 4194304;
17238f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r55, %r54, %r53, %p6;
1724a1f5fe8cSFraser Cormack; CHECK-SM70-NEXT:    prmt.b32 %r56, %r55, %r48, 0x7632U;
17258f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r56;
17268f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
17278f8016feSHugh Delaney  %1 = call nnan <2 x bfloat> @llvm.fma.bf16x2(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c)
17288f8016feSHugh Delaney  %2 = fcmp nsz ogt <2 x bfloat> %1, <bfloat 0.0, bfloat 0.0>
17298f8016feSHugh Delaney  %3 = select nsz <2 x i1> %2, <2 x bfloat> %1, <2 x bfloat> <bfloat 0.0, bfloat 0.0>
17308f8016feSHugh Delaney  %4 = fadd <2 x bfloat> %1, <bfloat 7.0, bfloat 7.0>
17318f8016feSHugh Delaney  %5 = fadd <2 x bfloat> %4, %1
17328f8016feSHugh Delaney  ret <2 x bfloat> %5
17338f8016feSHugh Delaney}
17348f8016feSHugh Delaney
17358f8016feSHugh Delaneydefine <2 x bfloat> @fma_bf16x2_maxnum_no_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c)  {
17368f8016feSHugh Delaney; CHECK-LABEL: fma_bf16x2_maxnum_no_nans(
17378f8016feSHugh Delaney; CHECK:       {
17388f8016feSHugh Delaney; CHECK-NEXT:    .reg .b32 %r<5>;
17398f8016feSHugh Delaney; CHECK-EMPTY:
17408f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
17418f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_bf16x2_maxnum_no_nans_param_2];
17428f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_bf16x2_maxnum_no_nans_param_1];
17438f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_bf16x2_maxnum_no_nans_param_0];
17448f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.bf16x2 %r4, %r3, %r2, %r1;
17458f8016feSHugh Delaney; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
17468f8016feSHugh Delaney; CHECK-NEXT:    ret;
17478f8016feSHugh Delaney;
17488f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16x2_maxnum_no_nans(
17498f8016feSHugh Delaney; CHECK-FTZ:       {
17508f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<5>;
17518f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
17528f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
17538f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_bf16x2_maxnum_no_nans_param_2];
17548f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_bf16x2_maxnum_no_nans_param_1];
17558f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_bf16x2_maxnum_no_nans_param_0];
17568f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.relu.bf16x2 %r4, %r3, %r2, %r1;
17578f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r4;
17588f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
17598f8016feSHugh Delaney;
17608f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16x2_maxnum_no_nans(
17618f8016feSHugh Delaney; CHECK-SM70:       {
17628f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<5>;
1763310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<7>;
1764310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b32 %r<43>;
17658f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<13>;
17668f8016feSHugh Delaney; CHECK-SM70-EMPTY:
17678f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
17688f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_bf16x2_maxnum_no_nans_param_0];
17698f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_bf16x2_maxnum_no_nans_param_1];
17708f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_bf16x2_maxnum_no_nans_param_2];
17718f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r3;
17728f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r4, %rs1;
17738f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r5, %r4, 16;
17748f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r5;
1775310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs3, %rs4}, %r2;
1776310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r6, %rs3;
17778f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r7, %r6, 16;
17788f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r7;
1779310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs5, %rs6}, %r1;
1780310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r8, %rs5;
17818f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r9, %r8, 16;
17828f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r9;
17838f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
17848f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r10, %f4;
17858f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r11, %r10, 16, 1;
17868f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r12, %r11, %r10;
17878f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r13, %r12, 32767;
17888f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
17898f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r14, %r10, 4194304;
17908f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r15, %r14, %r13, %p1;
17918f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r16, %rs2;
17928f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r17, %r16, 16;
17938f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r17;
1794310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r18, %rs4;
17958f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r19, %r18, 16;
17968f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f6, %r19;
1797310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r20, %rs6;
17988f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r21, %r20, 16;
17998f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f7, %r21;
18008f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f8, %f7, %f6, %f5;
18018f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r22, %f8;
18028f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r23, %r22, 16, 1;
18038f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r24, %r23, %r22;
18048f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r25, %r24, 32767;
18058f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p2, %f8, %f8;
18068f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r26, %r22, 4194304;
18078f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r27, %r26, %r25, %p2;
18088f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r28, %r27, -65536;
18098f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f9, %r28;
18108f8016feSHugh Delaney; CHECK-SM70-NEXT:    max.f32 %f10, %f9, 0f00000000;
18118f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r29, %f10;
18128f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r30, %r29, 16, 1;
18138f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r31, %r30, %r29;
18148f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r32, %r31, 32767;
18158f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p3, %f10, %f10;
18168f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r33, %r29, 4194304;
18178f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r34, %r33, %r32, %p3;
18188f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r35, %r15, -65536;
18198f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f11, %r35;
18208f8016feSHugh Delaney; CHECK-SM70-NEXT:    max.f32 %f12, %f11, 0f00000000;
18218f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r36, %f12;
18228f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r37, %r36, 16, 1;
18238f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r38, %r37, %r36;
18248f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r39, %r38, 32767;
18258f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p4, %f12, %f12;
18268f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r40, %r36, 4194304;
18278f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r41, %r40, %r39, %p4;
1828a1f5fe8cSFraser Cormack; CHECK-SM70-NEXT:    prmt.b32 %r42, %r41, %r34, 0x7632U;
18298f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r42;
18308f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
18318f8016feSHugh Delaney  %1 = call nnan <2 x bfloat> @llvm.fma.bf16x2(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c)
18328f8016feSHugh Delaney  %2 = call nsz <2 x bfloat> @llvm.maxnum.bf16x2(<2 x bfloat> %1, <2 x bfloat> <bfloat 0.0, bfloat 0.0>)
18338f8016feSHugh Delaney  ret <2 x bfloat> %2
18348f8016feSHugh Delaney}
1835