xref: /llvm-project/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll (revision 5e5fd0e6fc50cc1198750308c11433a5b3acfd0f)
18f8016feSHugh Delaney; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2b279f6b0SFangrui Song; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 | FileCheck %s
3b279f6b0SFangrui Song; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
48f8016feSHugh Delaney
58f8016feSHugh Delaney; Using FTZ should emit fma.ftz.relu for f16, not for bf16
6b279f6b0SFangrui Song; RUN: llc < %s -denormal-fp-math-f32=preserve-sign -mtriple=nvptx64 -mcpu=sm_80 | FileCheck %s --check-prefixes=CHECK-FTZ
7b279f6b0SFangrui Song; RUN: %if ptxas %{ llc < %s -denormal-fp-math-f32=preserve-sign -mtriple=nvptx64 -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
88f8016feSHugh Delaney
98f8016feSHugh Delaney; SM < 80 or (which needs PTX version >= 70) should not emit fma{.ftz}.relu
10b279f6b0SFangrui Song; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 | FileCheck %s --check-prefixes=CHECK-SM70
118f8016feSHugh Delaney
128f8016feSHugh Delaneydefine half @fma_f16_expanded_no_nans(half %a, half %b, half %c) #0 {
138f8016feSHugh Delaney; CHECK-LABEL: fma_f16_expanded_no_nans(
148f8016feSHugh Delaney; CHECK:       {
158f8016feSHugh Delaney; CHECK-NEXT:    .reg .b16 %rs<5>;
168f8016feSHugh Delaney; CHECK-EMPTY:
178f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
188f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_no_nans_param_0];
198f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_no_nans_param_1];
208f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_no_nans_param_2];
218f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.f16 %rs4, %rs1, %rs2, %rs3;
228f8016feSHugh Delaney; CHECK-NEXT:    st.param.b16 [func_retval0], %rs4;
238f8016feSHugh Delaney; CHECK-NEXT:    ret;
248f8016feSHugh Delaney;
258f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16_expanded_no_nans(
268f8016feSHugh Delaney; CHECK-FTZ:       {
278f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b16 %rs<5>;
288f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
298f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
308f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_no_nans_param_0];
318f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_no_nans_param_1];
328f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_no_nans_param_2];
338f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.relu.f16 %rs4, %rs1, %rs2, %rs3;
348f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs4;
358f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
368f8016feSHugh Delaney;
378f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16_expanded_no_nans(
388f8016feSHugh Delaney; CHECK-SM70:       {
398f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<2>;
408f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b16 %rs<7>;
418f8016feSHugh Delaney; CHECK-SM70-EMPTY:
428f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
438f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_no_nans_param_0];
448f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_no_nans_param_1];
458f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_no_nans_param_2];
468f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16 %rs4, %rs1, %rs2, %rs3;
478f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b16 %rs5, 0x0000;
488f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f16 %p1, %rs4, %rs5;
498f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b16 %rs6, %rs4, 0x0000, %p1;
508f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs6;
518f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
528f8016feSHugh Delaney  %1 = fmul half %a, %b
538f8016feSHugh Delaney  %2 = fadd half %1, %c
548f8016feSHugh Delaney  %3 = fcmp ogt half %2, 0.0
558f8016feSHugh Delaney  %4 = select i1 %3, half %2, half 0.0
568f8016feSHugh Delaney  ret half %4
578f8016feSHugh Delaney}
588f8016feSHugh Delaney
598f8016feSHugh Delaney; FMA relu shouldn't be selected if the FMA operation has multiple uses
608f8016feSHugh Delaneydefine half @fma_f16_expanded_no_nans_multiple_uses_of_fma(half %a, half %b, half %c) #0 {
618f8016feSHugh Delaney; CHECK-LABEL: fma_f16_expanded_no_nans_multiple_uses_of_fma(
628f8016feSHugh Delaney; CHECK:       {
638f8016feSHugh Delaney; CHECK-NEXT:    .reg .b16 %rs<10>;
648f8016feSHugh Delaney; CHECK-EMPTY:
658f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
668f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_0];
678f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_1];
688f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_2];
698f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.f16 %rs4, %rs1, %rs2, %rs3;
708f8016feSHugh Delaney; CHECK-NEXT:    mov.b16 %rs5, 0x0000;
718f8016feSHugh Delaney; CHECK-NEXT:    max.f16 %rs6, %rs4, %rs5;
728f8016feSHugh Delaney; CHECK-NEXT:    mov.b16 %rs7, 0x4700;
738f8016feSHugh Delaney; CHECK-NEXT:    add.f16 %rs8, %rs4, %rs7;
748f8016feSHugh Delaney; CHECK-NEXT:    add.f16 %rs9, %rs6, %rs8;
758f8016feSHugh Delaney; CHECK-NEXT:    st.param.b16 [func_retval0], %rs9;
768f8016feSHugh Delaney; CHECK-NEXT:    ret;
778f8016feSHugh Delaney;
788f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16_expanded_no_nans_multiple_uses_of_fma(
798f8016feSHugh Delaney; CHECK-FTZ:       {
808f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b16 %rs<10>;
818f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
828f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
838f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_0];
848f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_1];
858f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_2];
868f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.f16 %rs4, %rs1, %rs2, %rs3;
878f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b16 %rs5, 0x0000;
888f8016feSHugh Delaney; CHECK-FTZ-NEXT:    max.ftz.f16 %rs6, %rs4, %rs5;
898f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b16 %rs7, 0x4700;
908f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.ftz.f16 %rs8, %rs4, %rs7;
918f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.ftz.f16 %rs9, %rs6, %rs8;
928f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs9;
938f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
948f8016feSHugh Delaney;
958f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16_expanded_no_nans_multiple_uses_of_fma(
968f8016feSHugh Delaney; CHECK-SM70:       {
978f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<2>;
988f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b16 %rs<10>;
998f8016feSHugh Delaney; CHECK-SM70-EMPTY:
1008f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
1018f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_0];
1028f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_1];
1038f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_2];
1048f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16 %rs4, %rs1, %rs2, %rs3;
1058f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b16 %rs5, 0x0000;
1068f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f16 %p1, %rs4, %rs5;
1078f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b16 %rs6, %rs4, 0x0000, %p1;
1088f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b16 %rs7, 0x4700;
1098f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.f16 %rs8, %rs4, %rs7;
1108f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.f16 %rs9, %rs6, %rs8;
1118f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs9;
1128f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
1138f8016feSHugh Delaney  %1 = fmul half %a, %b
1148f8016feSHugh Delaney  %2 = fadd half %1, %c
1158f8016feSHugh Delaney  %3 = fcmp ogt half %2, 0.0
1168f8016feSHugh Delaney  %4 = select i1 %3, half %2, half 0.0
1178f8016feSHugh Delaney  %5 = fadd half %2, 7.0
1188f8016feSHugh Delaney  %6 = fadd half %4, %5
1198f8016feSHugh Delaney  ret half %6
1208f8016feSHugh Delaney}
1218f8016feSHugh Delaney
1228f8016feSHugh Delaneydefine half @fma_f16_expanded_unsafe_with_nans(half %a, half %b, half %c) #1 {
1238f8016feSHugh Delaney; CHECK-LABEL: fma_f16_expanded_unsafe_with_nans(
1248f8016feSHugh Delaney; CHECK:       {
1258f8016feSHugh Delaney; CHECK-NEXT:    .reg .b16 %rs<7>;
1268f8016feSHugh Delaney; CHECK-EMPTY:
1278f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
1288f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_unsafe_with_nans_param_0];
1298f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_unsafe_with_nans_param_1];
1308f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_unsafe_with_nans_param_2];
1318f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.f16 %rs4, %rs1, %rs2, %rs3;
1328f8016feSHugh Delaney; CHECK-NEXT:    mov.b16 %rs5, 0x0000;
1338f8016feSHugh Delaney; CHECK-NEXT:    max.f16 %rs6, %rs4, %rs5;
1348f8016feSHugh Delaney; CHECK-NEXT:    st.param.b16 [func_retval0], %rs6;
1358f8016feSHugh Delaney; CHECK-NEXT:    ret;
1368f8016feSHugh Delaney;
1378f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16_expanded_unsafe_with_nans(
1388f8016feSHugh Delaney; CHECK-FTZ:       {
1398f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b16 %rs<7>;
1408f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
1418f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
1428f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_unsafe_with_nans_param_0];
1438f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_unsafe_with_nans_param_1];
1448f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_unsafe_with_nans_param_2];
1458f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.f16 %rs4, %rs1, %rs2, %rs3;
1468f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b16 %rs5, 0x0000;
1478f8016feSHugh Delaney; CHECK-FTZ-NEXT:    max.ftz.f16 %rs6, %rs4, %rs5;
1488f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs6;
1498f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
1508f8016feSHugh Delaney;
1518f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16_expanded_unsafe_with_nans(
1528f8016feSHugh Delaney; CHECK-SM70:       {
1538f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<2>;
1548f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b16 %rs<7>;
1558f8016feSHugh Delaney; CHECK-SM70-EMPTY:
1568f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
1578f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_unsafe_with_nans_param_0];
1588f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_unsafe_with_nans_param_1];
1598f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_unsafe_with_nans_param_2];
1608f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16 %rs4, %rs1, %rs2, %rs3;
1618f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b16 %rs5, 0x0000;
1628f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f16 %p1, %rs4, %rs5;
1638f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b16 %rs6, %rs4, 0x0000, %p1;
1648f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs6;
1658f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
1668f8016feSHugh Delaney  %1 = fmul half %a, %b
1678f8016feSHugh Delaney  %2 = fadd half %1, %c
1688f8016feSHugh Delaney  %3 = fcmp ogt half %2, 0.0
1698f8016feSHugh Delaney  %4 = select i1 %3, half %2, half 0.0
1708f8016feSHugh Delaney  ret half %4
1718f8016feSHugh Delaney}
1728f8016feSHugh Delaney
1738f8016feSHugh Delaneydefine half @fma_f16_expanded_maxnum_no_nans(half %a, half %b, half %c) #0 {
1748f8016feSHugh Delaney; CHECK-LABEL: fma_f16_expanded_maxnum_no_nans(
1758f8016feSHugh Delaney; CHECK:       {
1768f8016feSHugh Delaney; CHECK-NEXT:    .reg .b16 %rs<5>;
1778f8016feSHugh Delaney; CHECK-EMPTY:
1788f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
1798f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_maxnum_no_nans_param_0];
1808f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_maxnum_no_nans_param_1];
1818f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_maxnum_no_nans_param_2];
1828f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.f16 %rs4, %rs1, %rs2, %rs3;
1838f8016feSHugh Delaney; CHECK-NEXT:    st.param.b16 [func_retval0], %rs4;
1848f8016feSHugh Delaney; CHECK-NEXT:    ret;
1858f8016feSHugh Delaney;
1868f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16_expanded_maxnum_no_nans(
1878f8016feSHugh Delaney; CHECK-FTZ:       {
1888f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b16 %rs<5>;
1898f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
1908f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
1918f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_maxnum_no_nans_param_0];
1928f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_maxnum_no_nans_param_1];
1938f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_maxnum_no_nans_param_2];
1948f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.relu.f16 %rs4, %rs1, %rs2, %rs3;
1958f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs4;
1968f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
1978f8016feSHugh Delaney;
1988f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16_expanded_maxnum_no_nans(
1998f8016feSHugh Delaney; CHECK-SM70:       {
2008f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b16 %rs<6>;
2018f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<3>;
2028f8016feSHugh Delaney; CHECK-SM70-EMPTY:
2038f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
2048f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs1, [fma_f16_expanded_maxnum_no_nans_param_0];
2058f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs2, [fma_f16_expanded_maxnum_no_nans_param_1];
2068f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b16 %rs3, [fma_f16_expanded_maxnum_no_nans_param_2];
2078f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16 %rs4, %rs1, %rs2, %rs3;
2088f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.f32.f16 %f1, %rs4;
2098f8016feSHugh Delaney; CHECK-SM70-NEXT:    max.f32 %f2, %f1, 0f00000000;
2108f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.rn.f16.f32 %rs5, %f2;
2118f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs5;
2128f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
2138f8016feSHugh Delaney  %1 = fmul half %a, %b
2148f8016feSHugh Delaney  %2 = fadd half %1, %c
2158f8016feSHugh Delaney  %3 = call half @llvm.maxnum.f16(half %2, half 0.0)
2168f8016feSHugh Delaney  ret half %3
2178f8016feSHugh Delaney}
2188f8016feSHugh Delaney
2198f8016feSHugh Delaneydefine bfloat @fma_bf16_expanded_unsafe_with_nans(bfloat %a, bfloat %b, bfloat %c) #1 {
2208f8016feSHugh Delaney; CHECK-LABEL: fma_bf16_expanded_unsafe_with_nans(
2218f8016feSHugh Delaney; CHECK:       {
2228f8016feSHugh Delaney; CHECK-NEXT:    .reg .b16 %rs<7>;
2238f8016feSHugh Delaney; CHECK-EMPTY:
2248f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
2258f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_bf16_expanded_unsafe_with_nans_param_0];
2268f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_bf16_expanded_unsafe_with_nans_param_1];
2278f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_bf16_expanded_unsafe_with_nans_param_2];
2288f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.bf16 %rs4, %rs1, %rs2, %rs3;
2298f8016feSHugh Delaney; CHECK-NEXT:    mov.b16 %rs5, 0x0000;
2308f8016feSHugh Delaney; CHECK-NEXT:    max.bf16 %rs6, %rs4, %rs5;
2318f8016feSHugh Delaney; CHECK-NEXT:    st.param.b16 [func_retval0], %rs6;
2328f8016feSHugh Delaney; CHECK-NEXT:    ret;
2338f8016feSHugh Delaney;
2348f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16_expanded_unsafe_with_nans(
2358f8016feSHugh Delaney; CHECK-FTZ:       {
2368f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b16 %rs<7>;
2378f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
2388f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
2398f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_bf16_expanded_unsafe_with_nans_param_0];
2408f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_bf16_expanded_unsafe_with_nans_param_1];
2418f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_bf16_expanded_unsafe_with_nans_param_2];
2428f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.bf16 %rs4, %rs1, %rs2, %rs3;
2438f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b16 %rs5, 0x0000;
2448f8016feSHugh Delaney; CHECK-FTZ-NEXT:    max.bf16 %rs6, %rs4, %rs5;
2458f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs6;
2468f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
2478f8016feSHugh Delaney;
2488f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16_expanded_unsafe_with_nans(
2498f8016feSHugh Delaney; CHECK-SM70:       {
2508f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<3>;
251310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<3>;
2528f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<14>;
2538f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<6>;
2548f8016feSHugh Delaney; CHECK-SM70-EMPTY:
2558f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
2568f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r1, [fma_bf16_expanded_unsafe_with_nans_param_2];
2578f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r2, %r1, 16;
2588f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r2;
2598f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r3, [fma_bf16_expanded_unsafe_with_nans_param_1];
2608f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r4, %r3, 16;
2618f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r4;
2628f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r5, [fma_bf16_expanded_unsafe_with_nans_param_0];
2638f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r6, %r5, 16;
2648f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r6;
2658f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
2668f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r7, %f4;
2678f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r8, %r7, 16, 1;
2688f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r9, %r8, %r7;
2698f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r10, %r9, 32767;
2708f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
2718f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r11, %r7, 4194304;
2728f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r12, %r11, %r10, %p1;
2738f8016feSHugh Delaney; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r12; }
2748f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r13, %r12, -65536;
2758f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r13;
2768f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f32 %p2, %f5, 0f00000000;
277310e7987SAlex MacLean; CHECK-SM70-NEXT:    selp.b16 %rs2, %rs1, 0x0000, %p2;
278310e7987SAlex MacLean; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs2;
2798f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
2808f8016feSHugh Delaney  %1 = fmul bfloat %a, %b
2818f8016feSHugh Delaney  %2 = fadd bfloat %1, %c
2828f8016feSHugh Delaney  %3 = fcmp ogt bfloat %2, 0.0
2838f8016feSHugh Delaney  %4 = select i1 %3, bfloat %2, bfloat 0.0
2848f8016feSHugh Delaney  ret bfloat %4
2858f8016feSHugh Delaney}
2868f8016feSHugh Delaney
2878f8016feSHugh Delaneydefine bfloat @fma_bf16_expanded_no_nans(bfloat %a, bfloat %b, bfloat %c) #0 {
2888f8016feSHugh Delaney; CHECK-LABEL: fma_bf16_expanded_no_nans(
2898f8016feSHugh Delaney; CHECK:       {
2908f8016feSHugh Delaney; CHECK-NEXT:    .reg .b16 %rs<5>;
2918f8016feSHugh Delaney; CHECK-EMPTY:
2928f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
2938f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_bf16_expanded_no_nans_param_0];
2948f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_bf16_expanded_no_nans_param_1];
2958f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_bf16_expanded_no_nans_param_2];
2968f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.bf16 %rs4, %rs1, %rs2, %rs3;
2978f8016feSHugh Delaney; CHECK-NEXT:    st.param.b16 [func_retval0], %rs4;
2988f8016feSHugh Delaney; CHECK-NEXT:    ret;
2998f8016feSHugh Delaney;
3008f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16_expanded_no_nans(
3018f8016feSHugh Delaney; CHECK-FTZ:       {
3028f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b16 %rs<5>;
3038f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
3048f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
3058f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_bf16_expanded_no_nans_param_0];
3068f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_bf16_expanded_no_nans_param_1];
3078f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_bf16_expanded_no_nans_param_2];
3088f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.relu.bf16 %rs4, %rs1, %rs2, %rs3;
3098f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs4;
3108f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
3118f8016feSHugh Delaney;
3128f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16_expanded_no_nans(
3138f8016feSHugh Delaney; CHECK-SM70:       {
3148f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<3>;
315310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<3>;
3168f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<14>;
3178f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<6>;
3188f8016feSHugh Delaney; CHECK-SM70-EMPTY:
3198f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
3208f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r1, [fma_bf16_expanded_no_nans_param_2];
3218f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r2, %r1, 16;
3228f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r2;
3238f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r3, [fma_bf16_expanded_no_nans_param_1];
3248f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r4, %r3, 16;
3258f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r4;
3268f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r5, [fma_bf16_expanded_no_nans_param_0];
3278f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r6, %r5, 16;
3288f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r6;
3298f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
3308f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r7, %f4;
3318f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r8, %r7, 16, 1;
3328f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r9, %r8, %r7;
3338f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r10, %r9, 32767;
3348f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
3358f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r11, %r7, 4194304;
3368f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r12, %r11, %r10, %p1;
3378f8016feSHugh Delaney; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r12; }
3388f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r13, %r12, -65536;
3398f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r13;
3408f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f32 %p2, %f5, 0f00000000;
341310e7987SAlex MacLean; CHECK-SM70-NEXT:    selp.b16 %rs2, %rs1, 0x0000, %p2;
342310e7987SAlex MacLean; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs2;
3438f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
3448f8016feSHugh Delaney  %1 = fmul bfloat %a, %b
3458f8016feSHugh Delaney  %2 = fadd bfloat %1, %c
3468f8016feSHugh Delaney  %3 = fcmp ogt bfloat %2, 0.0
3478f8016feSHugh Delaney  %4 = select i1 %3, bfloat %2, bfloat 0.0
3488f8016feSHugh Delaney  ret bfloat %4
3498f8016feSHugh Delaney}
3508f8016feSHugh Delaney
3518f8016feSHugh Delaney; FMA relu shouldn't be selected if the FMA operation has multiple uses
3528f8016feSHugh Delaneydefine bfloat @fma_bf16_expanded_no_nans_multiple_uses_of_fma(bfloat %a, bfloat %b, bfloat %c) #0 {
3538f8016feSHugh Delaney; CHECK-LABEL: fma_bf16_expanded_no_nans_multiple_uses_of_fma(
3548f8016feSHugh Delaney; CHECK:       {
355*5e5fd0e6Speterbell10; CHECK-NEXT:    .reg .b16 %rs<11>;
3568f8016feSHugh Delaney; CHECK-EMPTY:
3578f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
3588f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_0];
3598f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_1];
3608f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_2];
3618f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.bf16 %rs4, %rs1, %rs2, %rs3;
3628f8016feSHugh Delaney; CHECK-NEXT:    mov.b16 %rs5, 0x0000;
3638f8016feSHugh Delaney; CHECK-NEXT:    max.bf16 %rs6, %rs4, %rs5;
364*5e5fd0e6Speterbell10; CHECK-NEXT:    mov.b16 %rs7, 0x3F80;
365*5e5fd0e6Speterbell10; CHECK-NEXT:    mov.b16 %rs8, 0x40E0;
366*5e5fd0e6Speterbell10; CHECK-NEXT:    fma.rn.bf16 %rs9, %rs4, %rs7, %rs8;
367*5e5fd0e6Speterbell10; CHECK-NEXT:    fma.rn.bf16 %rs10, %rs6, %rs7, %rs9;
368*5e5fd0e6Speterbell10; CHECK-NEXT:    st.param.b16 [func_retval0], %rs10;
3698f8016feSHugh Delaney; CHECK-NEXT:    ret;
3708f8016feSHugh Delaney;
3718f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16_expanded_no_nans_multiple_uses_of_fma(
3728f8016feSHugh Delaney; CHECK-FTZ:       {
373310e7987SAlex MacLean; CHECK-FTZ-NEXT:    .reg .b16 %rs<9>;
3748f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<7>;
3758f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .f32 %f<6>;
3768f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
3778f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
3788f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_0];
3798f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_1];
3808f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_2];
3818f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.bf16 %rs4, %rs1, %rs2, %rs3;
3828f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b16 %rs5, 0x0000;
3838f8016feSHugh Delaney; CHECK-FTZ-NEXT:    max.bf16 %rs6, %rs4, %rs5;
3848f8016feSHugh Delaney; CHECK-FTZ-NEXT:    cvt.u32.u16 %r1, %rs4;
3858f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r2, %r1, 16;
3868f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f1, %r2;
3878f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.ftz.f32 %f2, %f1, 0f40E00000;
388310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.rn.bf16.f32 %rs7, %f2;
3898f8016feSHugh Delaney; CHECK-FTZ-NEXT:    cvt.u32.u16 %r3, %rs6;
3908f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r4, %r3, 16;
3918f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f3, %r4;
392310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.u32.u16 %r5, %rs7;
3938f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r6, %r5, 16;
3948f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f4, %r6;
3958f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.ftz.f32 %f5, %f3, %f4;
396310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.rn.bf16.f32 %rs8, %f5;
397310e7987SAlex MacLean; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs8;
3988f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
3998f8016feSHugh Delaney;
4008f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16_expanded_no_nans_multiple_uses_of_fma(
4018f8016feSHugh Delaney; CHECK-SM70:       {
4028f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<5>;
403310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<4>;
4048f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<29>;
4058f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<10>;
4068f8016feSHugh Delaney; CHECK-SM70-EMPTY:
4078f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
4088f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r1, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_2];
4098f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r2, %r1, 16;
4108f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r2;
4118f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r3, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_1];
4128f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r4, %r3, 16;
4138f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r4;
4148f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r5, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_0];
4158f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r6, %r5, 16;
4168f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r6;
4178f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
4188f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r7, %f4;
4198f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r8, %r7, 16, 1;
4208f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r9, %r8, %r7;
4218f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r10, %r9, 32767;
4228f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
4238f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r11, %r7, 4194304;
4248f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r12, %r11, %r10, %p1;
4258f8016feSHugh Delaney; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r12; }
4268f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r13, %r12, -65536;
4278f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r13;
4288f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f32 %p2, %f5, 0f00000000;
429310e7987SAlex MacLean; CHECK-SM70-NEXT:    selp.b16 %rs2, %rs1, 0x0000, %p2;
4308f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.f32 %f6, %f5, 0f40E00000;
4318f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r14, %f6;
4328f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r15, %r14, 16, 1;
4338f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r16, %r15, %r14;
4348f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r17, %r16, 32767;
4358f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p3, %f6, %f6;
4368f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r18, %r14, 4194304;
4378f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r19, %r18, %r17, %p3;
438310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r20, %rs2;
4398f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r21, %r20, 16;
4408f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f7, %r21;
4418f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r22, %r19, -65536;
4428f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f8, %r22;
4438f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.f32 %f9, %f7, %f8;
4448f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r23, %f9;
4458f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r24, %r23, 16, 1;
4468f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r25, %r24, %r23;
4478f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r26, %r25, 32767;
4488f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p4, %f9, %f9;
4498f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r27, %r23, 4194304;
4508f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r28, %r27, %r26, %p4;
451310e7987SAlex MacLean; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs3}, %r28; }
452310e7987SAlex MacLean; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs3;
4538f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
4548f8016feSHugh Delaney  %1 = fmul bfloat %a, %b
4558f8016feSHugh Delaney  %2 = fadd bfloat %1, %c
4568f8016feSHugh Delaney  %3 = fcmp ogt bfloat %2, 0.0
4578f8016feSHugh Delaney  %4 = select i1 %3, bfloat %2, bfloat 0.0
4588f8016feSHugh Delaney  %5 = fadd bfloat %2, 7.0
4598f8016feSHugh Delaney  %6 = fadd bfloat %4, %5
4608f8016feSHugh Delaney  ret bfloat %6
4618f8016feSHugh Delaney}
4628f8016feSHugh Delaney
4638f8016feSHugh Delaneydefine bfloat @fma_bf16_expanded_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c) #0 {
4648f8016feSHugh Delaney; CHECK-LABEL: fma_bf16_expanded_maxnum_no_nans(
4658f8016feSHugh Delaney; CHECK:       {
4668f8016feSHugh Delaney; CHECK-NEXT:    .reg .b16 %rs<5>;
4678f8016feSHugh Delaney; CHECK-EMPTY:
4688f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
4698f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs1, [fma_bf16_expanded_maxnum_no_nans_param_0];
4708f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs2, [fma_bf16_expanded_maxnum_no_nans_param_1];
4718f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b16 %rs3, [fma_bf16_expanded_maxnum_no_nans_param_2];
4728f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.bf16 %rs4, %rs1, %rs2, %rs3;
4738f8016feSHugh Delaney; CHECK-NEXT:    st.param.b16 [func_retval0], %rs4;
4748f8016feSHugh Delaney; CHECK-NEXT:    ret;
4758f8016feSHugh Delaney;
4768f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16_expanded_maxnum_no_nans(
4778f8016feSHugh Delaney; CHECK-FTZ:       {
4788f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b16 %rs<5>;
4798f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
4808f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
4818f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs1, [fma_bf16_expanded_maxnum_no_nans_param_0];
4828f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs2, [fma_bf16_expanded_maxnum_no_nans_param_1];
4838f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b16 %rs3, [fma_bf16_expanded_maxnum_no_nans_param_2];
4848f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.relu.bf16 %rs4, %rs1, %rs2, %rs3;
4858f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b16 [func_retval0], %rs4;
4868f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
4878f8016feSHugh Delaney;
4888f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16_expanded_maxnum_no_nans(
4898f8016feSHugh Delaney; CHECK-SM70:       {
4908f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<3>;
491310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<2>;
4928f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<20>;
4938f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<7>;
4948f8016feSHugh Delaney; CHECK-SM70-EMPTY:
4958f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
4968f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r1, [fma_bf16_expanded_maxnum_no_nans_param_2];
4978f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r2, %r1, 16;
4988f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r2;
4998f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r3, [fma_bf16_expanded_maxnum_no_nans_param_1];
5008f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r4, %r3, 16;
5018f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r4;
5028f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.u16 %r5, [fma_bf16_expanded_maxnum_no_nans_param_0];
5038f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r6, %r5, 16;
5048f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r6;
5058f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
5068f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r7, %f4;
5078f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r8, %r7, 16, 1;
5088f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r9, %r8, %r7;
5098f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r10, %r9, 32767;
5108f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
5118f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r11, %r7, 4194304;
5128f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r12, %r11, %r10, %p1;
5138f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r13, %r12, -65536;
5148f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r13;
5158f8016feSHugh Delaney; CHECK-SM70-NEXT:    max.f32 %f6, %f5, 0f00000000;
5168f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r14, %f6;
5178f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r15, %r14, 16, 1;
5188f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r16, %r15, %r14;
5198f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r17, %r16, 32767;
5208f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p2, %f6, %f6;
5218f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r18, %r14, 4194304;
5228f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r19, %r18, %r17, %p2;
5238f8016feSHugh Delaney; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r19; }
5248f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b16 [func_retval0], %rs1;
5258f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
5268f8016feSHugh Delaney  %1 = fmul bfloat %a, %b
5278f8016feSHugh Delaney  %2 = fadd bfloat %1, %c
5288f8016feSHugh Delaney  %3 = call bfloat @llvm.maxnum.bf16(bfloat %2, bfloat 0.0)
5298f8016feSHugh Delaney  ret bfloat %3
5308f8016feSHugh Delaney}
5318f8016feSHugh Delaney
5328f8016feSHugh Delaneydefine <2 x half> @fma_f16x2_expanded_no_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
5338f8016feSHugh Delaney; CHECK-LABEL: fma_f16x2_expanded_no_nans(
5348f8016feSHugh Delaney; CHECK:       {
5358f8016feSHugh Delaney; CHECK-NEXT:    .reg .b32 %r<5>;
5368f8016feSHugh Delaney; CHECK-EMPTY:
5378f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
5388f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_no_nans_param_2];
5398f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_no_nans_param_1];
5408f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_no_nans_param_0];
5418f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.f16x2 %r4, %r3, %r2, %r1;
5428f8016feSHugh Delaney; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
5438f8016feSHugh Delaney; CHECK-NEXT:    ret;
5448f8016feSHugh Delaney;
5458f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16x2_expanded_no_nans(
5468f8016feSHugh Delaney; CHECK-FTZ:       {
5478f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<5>;
5488f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
5498f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
5508f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_no_nans_param_2];
5518f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_no_nans_param_1];
5528f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_no_nans_param_0];
5538f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.relu.f16x2 %r4, %r3, %r2, %r1;
5548f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r4;
5558f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
5568f8016feSHugh Delaney;
5578f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16x2_expanded_no_nans(
5588f8016feSHugh Delaney; CHECK-SM70:       {
5598f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<3>;
5608f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b16 %rs<5>;
5618f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<7>;
5628f8016feSHugh Delaney; CHECK-SM70-EMPTY:
5638f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
5648f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_no_nans_param_2];
5658f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_no_nans_param_1];
5668f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_no_nans_param_0];
5678f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16x2 %r4, %r3, %r2, %r1;
5688f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r5, 0;
5698f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f16x2 %p1|%p2, %r4, %r5;
5708f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r4;
5718f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b16 %rs3, %rs2, 0x0000, %p2;
5728f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b16 %rs4, %rs1, 0x0000, %p1;
5738f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r6, {%rs4, %rs3};
5748f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r6;
5758f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
5768f8016feSHugh Delaney  %1 = fmul <2 x half> %a, %b
5778f8016feSHugh Delaney  %2 = fadd <2 x half> %1, %c
5788f8016feSHugh Delaney  %3 = fcmp ogt <2 x half> %2, <half 0.0, half 0.0>
5798f8016feSHugh Delaney  %4 = select <2 x i1> %3, <2 x half> %2, <2 x half> <half 0.0, half 0.0>
5808f8016feSHugh Delaney  ret <2 x half> %4
5818f8016feSHugh Delaney}
5828f8016feSHugh Delaney
5838f8016feSHugh Delaney; FMA relu shouldn't be selected if the FMA operation has multiple uses
5848f8016feSHugh Delaneydefine <2 x half> @fma_f16x2_expanded_no_nans_multiple_uses_of_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
5858f8016feSHugh Delaney; CHECK-LABEL: fma_f16x2_expanded_no_nans_multiple_uses_of_fma(
5868f8016feSHugh Delaney; CHECK:       {
5878f8016feSHugh Delaney; CHECK-NEXT:    .reg .b32 %r<10>;
5888f8016feSHugh Delaney; CHECK-EMPTY:
5898f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
5908f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_no_nans_multiple_uses_of_fma_param_2];
5918f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_no_nans_multiple_uses_of_fma_param_1];
5928f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_no_nans_multiple_uses_of_fma_param_0];
5938f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.f16x2 %r4, %r3, %r2, %r1;
5948f8016feSHugh Delaney; CHECK-NEXT:    mov.b32 %r5, 0;
5958f8016feSHugh Delaney; CHECK-NEXT:    max.f16x2 %r6, %r4, %r5;
5968f8016feSHugh Delaney; CHECK-NEXT:    mov.b32 %r7, 1191200512;
5978f8016feSHugh Delaney; CHECK-NEXT:    add.f16x2 %r8, %r4, %r7;
5988f8016feSHugh Delaney; CHECK-NEXT:    add.f16x2 %r9, %r6, %r8;
5998f8016feSHugh Delaney; CHECK-NEXT:    st.param.b32 [func_retval0], %r9;
6008f8016feSHugh Delaney; CHECK-NEXT:    ret;
6018f8016feSHugh Delaney;
6028f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16x2_expanded_no_nans_multiple_uses_of_fma(
6038f8016feSHugh Delaney; CHECK-FTZ:       {
6048f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<10>;
6058f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
6068f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
6078f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_no_nans_multiple_uses_of_fma_param_2];
6088f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_no_nans_multiple_uses_of_fma_param_1];
6098f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_no_nans_multiple_uses_of_fma_param_0];
6108f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.f16x2 %r4, %r3, %r2, %r1;
6118f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %r5, 0;
6128f8016feSHugh Delaney; CHECK-FTZ-NEXT:    max.ftz.f16x2 %r6, %r4, %r5;
6138f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %r7, 1191200512;
6148f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.ftz.f16x2 %r8, %r4, %r7;
6158f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.ftz.f16x2 %r9, %r6, %r8;
6168f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r9;
6178f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
6188f8016feSHugh Delaney;
6198f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16x2_expanded_no_nans_multiple_uses_of_fma(
6208f8016feSHugh Delaney; CHECK-SM70:       {
6218f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<3>;
6228f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b16 %rs<5>;
6238f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<10>;
6248f8016feSHugh Delaney; CHECK-SM70-EMPTY:
6258f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
6268f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_no_nans_multiple_uses_of_fma_param_2];
6278f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_no_nans_multiple_uses_of_fma_param_1];
6288f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_no_nans_multiple_uses_of_fma_param_0];
6298f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16x2 %r4, %r3, %r2, %r1;
6308f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r5, 0;
6318f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f16x2 %p1|%p2, %r4, %r5;
6328f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r4;
6338f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b16 %rs3, %rs2, 0x0000, %p2;
6348f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b16 %rs4, %rs1, 0x0000, %p1;
6358f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r6, {%rs4, %rs3};
6368f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r7, 1191200512;
6378f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.f16x2 %r8, %r4, %r7;
6388f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.f16x2 %r9, %r6, %r8;
6398f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r9;
6408f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
6418f8016feSHugh Delaney  %1 = fmul <2 x half> %a, %b
6428f8016feSHugh Delaney  %2 = fadd <2 x half> %1, %c
6438f8016feSHugh Delaney  %3 = fcmp ogt <2 x half> %2, <half 0.0, half 0.0>
6448f8016feSHugh Delaney  %4 = select <2 x i1> %3, <2 x half> %2, <2 x half> <half 0.0, half 0.0>
6458f8016feSHugh Delaney  %5 = fadd <2 x half> %2, <half 7.0, half 7.0>
6468f8016feSHugh Delaney  %6 = fadd <2 x half> %4, %5
6478f8016feSHugh Delaney  ret <2 x half> %6
6488f8016feSHugh Delaney}
6498f8016feSHugh Delaney
6508f8016feSHugh Delaneydefine <2 x half> @fma_f16x2_expanded_unsafe_with_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c) #1 {
6518f8016feSHugh Delaney; CHECK-LABEL: fma_f16x2_expanded_unsafe_with_nans(
6528f8016feSHugh Delaney; CHECK:       {
6538f8016feSHugh Delaney; CHECK-NEXT:    .reg .b32 %r<7>;
6548f8016feSHugh Delaney; CHECK-EMPTY:
6558f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
6568f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_unsafe_with_nans_param_2];
6578f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_unsafe_with_nans_param_1];
6588f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_unsafe_with_nans_param_0];
6598f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.f16x2 %r4, %r3, %r2, %r1;
6608f8016feSHugh Delaney; CHECK-NEXT:    mov.b32 %r5, 0;
6618f8016feSHugh Delaney; CHECK-NEXT:    max.f16x2 %r6, %r4, %r5;
6628f8016feSHugh Delaney; CHECK-NEXT:    st.param.b32 [func_retval0], %r6;
6638f8016feSHugh Delaney; CHECK-NEXT:    ret;
6648f8016feSHugh Delaney;
6658f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16x2_expanded_unsafe_with_nans(
6668f8016feSHugh Delaney; CHECK-FTZ:       {
6678f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<7>;
6688f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
6698f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
6708f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_unsafe_with_nans_param_2];
6718f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_unsafe_with_nans_param_1];
6728f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_unsafe_with_nans_param_0];
6738f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.f16x2 %r4, %r3, %r2, %r1;
6748f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %r5, 0;
6758f8016feSHugh Delaney; CHECK-FTZ-NEXT:    max.ftz.f16x2 %r6, %r4, %r5;
6768f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r6;
6778f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
6788f8016feSHugh Delaney;
6798f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16x2_expanded_unsafe_with_nans(
6808f8016feSHugh Delaney; CHECK-SM70:       {
6818f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<3>;
6828f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b16 %rs<5>;
6838f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<7>;
6848f8016feSHugh Delaney; CHECK-SM70-EMPTY:
6858f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
6868f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_unsafe_with_nans_param_2];
6878f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_unsafe_with_nans_param_1];
6888f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_unsafe_with_nans_param_0];
6898f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16x2 %r4, %r3, %r2, %r1;
6908f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r5, 0;
6918f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f16x2 %p1|%p2, %r4, %r5;
6928f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r4;
6938f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b16 %rs3, %rs2, 0x0000, %p2;
6948f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b16 %rs4, %rs1, 0x0000, %p1;
6958f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r6, {%rs4, %rs3};
6968f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r6;
6978f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
6988f8016feSHugh Delaney  %1 = fmul <2 x half> %a, %b
6998f8016feSHugh Delaney  %2 = fadd <2 x half> %1, %c
7008f8016feSHugh Delaney  %3 = fcmp ogt <2 x half> %2, <half 0.0, half 0.0>
7018f8016feSHugh Delaney  %4 = select <2 x i1> %3, <2 x half> %2, <2 x half> <half 0.0, half 0.0>
7028f8016feSHugh Delaney  ret <2 x half> %4
7038f8016feSHugh Delaney}
7048f8016feSHugh Delaney
7058f8016feSHugh Delaneydefine <2 x half> @fma_f16x2_expanded_maxnum_no_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
7068f8016feSHugh Delaney; CHECK-LABEL: fma_f16x2_expanded_maxnum_no_nans(
7078f8016feSHugh Delaney; CHECK:       {
7088f8016feSHugh Delaney; CHECK-NEXT:    .reg .b32 %r<5>;
7098f8016feSHugh Delaney; CHECK-EMPTY:
7108f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
7118f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_maxnum_no_nans_param_2];
7128f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_maxnum_no_nans_param_1];
7138f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_maxnum_no_nans_param_0];
7148f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.f16x2 %r4, %r3, %r2, %r1;
7158f8016feSHugh Delaney; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
7168f8016feSHugh Delaney; CHECK-NEXT:    ret;
7178f8016feSHugh Delaney;
7188f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_f16x2_expanded_maxnum_no_nans(
7198f8016feSHugh Delaney; CHECK-FTZ:       {
7208f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<5>;
7218f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
7228f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
7238f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_maxnum_no_nans_param_2];
7248f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_maxnum_no_nans_param_1];
7258f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_maxnum_no_nans_param_0];
7268f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.ftz.relu.f16x2 %r4, %r3, %r2, %r1;
7278f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r4;
7288f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
7298f8016feSHugh Delaney;
7308f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_f16x2_expanded_maxnum_no_nans(
7318f8016feSHugh Delaney; CHECK-SM70:       {
7328f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b16 %rs<5>;
7338f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<6>;
7348f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<5>;
7358f8016feSHugh Delaney; CHECK-SM70-EMPTY:
7368f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
7378f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_f16x2_expanded_maxnum_no_nans_param_2];
7388f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_f16x2_expanded_maxnum_no_nans_param_1];
7398f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_f16x2_expanded_maxnum_no_nans_param_0];
7408f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f16x2 %r4, %r3, %r2, %r1;
7418f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r4;
7428f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.f32.f16 %f1, %rs2;
7438f8016feSHugh Delaney; CHECK-SM70-NEXT:    max.f32 %f2, %f1, 0f00000000;
7448f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.rn.f16.f32 %rs3, %f2;
7458f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.f32.f16 %f3, %rs1;
7468f8016feSHugh Delaney; CHECK-SM70-NEXT:    max.f32 %f4, %f3, 0f00000000;
7478f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.rn.f16.f32 %rs4, %f4;
7488f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r5, {%rs4, %rs3};
7498f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r5;
7508f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
7518f8016feSHugh Delaney  %1 = fmul <2 x half> %a, %b
7528f8016feSHugh Delaney  %2 = fadd <2 x half> %1, %c
7538f8016feSHugh Delaney  %3 = call <2 x half> @llvm.maxnum.f16x2(<2 x half> %2, <2 x half> <half 0.0, half 0.0>)
7548f8016feSHugh Delaney  ret <2 x half> %3
7558f8016feSHugh Delaney}
7568f8016feSHugh Delaney
7578f8016feSHugh Delaneydefine <2 x bfloat> @fma_bf16x2_expanded_unsafe_with_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) #1 {
7588f8016feSHugh Delaney; CHECK-LABEL: fma_bf16x2_expanded_unsafe_with_nans(
7598f8016feSHugh Delaney; CHECK:       {
7608f8016feSHugh Delaney; CHECK-NEXT:    .reg .b32 %r<7>;
7618f8016feSHugh Delaney; CHECK-EMPTY:
7628f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
7638f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_unsafe_with_nans_param_2];
7648f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_unsafe_with_nans_param_1];
7658f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_unsafe_with_nans_param_0];
7668f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.bf16x2 %r4, %r3, %r2, %r1;
7678f8016feSHugh Delaney; CHECK-NEXT:    mov.b32 %r5, 0;
7688f8016feSHugh Delaney; CHECK-NEXT:    max.bf16x2 %r6, %r4, %r5;
7698f8016feSHugh Delaney; CHECK-NEXT:    st.param.b32 [func_retval0], %r6;
7708f8016feSHugh Delaney; CHECK-NEXT:    ret;
7718f8016feSHugh Delaney;
7728f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16x2_expanded_unsafe_with_nans(
7738f8016feSHugh Delaney; CHECK-FTZ:       {
7748f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<7>;
7758f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
7768f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
7778f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_unsafe_with_nans_param_2];
7788f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_unsafe_with_nans_param_1];
7798f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_unsafe_with_nans_param_0];
7808f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.bf16x2 %r4, %r3, %r2, %r1;
7818f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %r5, 0;
7828f8016feSHugh Delaney; CHECK-FTZ-NEXT:    max.bf16x2 %r6, %r4, %r5;
7838f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r6;
7848f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
7858f8016feSHugh Delaney;
7868f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16x2_expanded_unsafe_with_nans(
7878f8016feSHugh Delaney; CHECK-SM70:       {
7888f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<5>;
789310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<11>;
7908f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<31>;
7918f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<11>;
7928f8016feSHugh Delaney; CHECK-SM70-EMPTY:
7938f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
7948f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_unsafe_with_nans_param_0];
7958f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_unsafe_with_nans_param_1];
7968f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_unsafe_with_nans_param_2];
7978f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r3;
7988f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r4, %rs1;
7998f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r5, %r4, 16;
8008f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r5;
801310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs3, %rs4}, %r2;
802310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r6, %rs3;
8038f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r7, %r6, 16;
8048f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r7;
805310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs5, %rs6}, %r1;
806310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r8, %rs5;
8078f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r9, %r8, 16;
8088f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r9;
8098f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
8108f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r10, %f4;
8118f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r11, %r10, 16, 1;
8128f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r12, %r11, %r10;
8138f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r13, %r12, 32767;
8148f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
8158f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r14, %r10, 4194304;
8168f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r15, %r14, %r13, %p1;
817310e7987SAlex MacLean; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs7}, %r15; }
8188f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r16, %rs2;
8198f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r17, %r16, 16;
8208f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r17;
821310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r18, %rs4;
8228f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r19, %r18, 16;
8238f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f6, %r19;
824310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r20, %rs6;
8258f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r21, %r20, 16;
8268f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f7, %r21;
8278f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f8, %f7, %f6, %f5;
8288f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r22, %f8;
8298f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r23, %r22, 16, 1;
8308f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r24, %r23, %r22;
8318f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r25, %r24, 32767;
8328f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p2, %f8, %f8;
8338f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r26, %r22, 4194304;
8348f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r27, %r26, %r25, %p2;
835310e7987SAlex MacLean; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs8}, %r27; }
8368f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r28, %r15, -65536;
8378f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f9, %r28;
8388f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f32 %p3, %f9, 0f00000000;
8398f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r29, %r27, -65536;
8408f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f10, %r29;
8418f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f32 %p4, %f10, 0f00000000;
842310e7987SAlex MacLean; CHECK-SM70-NEXT:    selp.b16 %rs9, %rs8, 0x0000, %p4;
843310e7987SAlex MacLean; CHECK-SM70-NEXT:    selp.b16 %rs10, %rs7, 0x0000, %p3;
844310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 %r30, {%rs10, %rs9};
8458f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r30;
8468f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
8478f8016feSHugh Delaney  %1 = fmul <2 x bfloat> %a, %b
8488f8016feSHugh Delaney  %2 = fadd <2 x bfloat> %1, %c
8498f8016feSHugh Delaney  %3 = fcmp ogt <2 x bfloat> %2, <bfloat 0.0, bfloat 0.0>
8508f8016feSHugh Delaney  %4 = select <2 x i1> %3, <2 x bfloat> %2, <2 x bfloat> <bfloat 0.0, bfloat 0.0>
8518f8016feSHugh Delaney  ret <2 x bfloat> %4
8528f8016feSHugh Delaney}
8538f8016feSHugh Delaney
8548f8016feSHugh Delaneydefine <2 x bfloat> @fma_bf16x2_expanded_no_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) #0 {
8558f8016feSHugh Delaney; CHECK-LABEL: fma_bf16x2_expanded_no_nans(
8568f8016feSHugh Delaney; CHECK:       {
8578f8016feSHugh Delaney; CHECK-NEXT:    .reg .b32 %r<5>;
8588f8016feSHugh Delaney; CHECK-EMPTY:
8598f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
8608f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_no_nans_param_2];
8618f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_no_nans_param_1];
8628f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_no_nans_param_0];
8638f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.bf16x2 %r4, %r3, %r2, %r1;
8648f8016feSHugh Delaney; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
8658f8016feSHugh Delaney; CHECK-NEXT:    ret;
8668f8016feSHugh Delaney;
8678f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16x2_expanded_no_nans(
8688f8016feSHugh Delaney; CHECK-FTZ:       {
8698f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<5>;
8708f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
8718f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
8728f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_no_nans_param_2];
8738f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_no_nans_param_1];
8748f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_no_nans_param_0];
8758f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.relu.bf16x2 %r4, %r3, %r2, %r1;
8768f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r4;
8778f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
8788f8016feSHugh Delaney;
8798f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16x2_expanded_no_nans(
8808f8016feSHugh Delaney; CHECK-SM70:       {
8818f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<5>;
882310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<11>;
8838f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .b32 %r<31>;
8848f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<11>;
8858f8016feSHugh Delaney; CHECK-SM70-EMPTY:
8868f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
8878f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_no_nans_param_0];
8888f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_no_nans_param_1];
8898f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_no_nans_param_2];
8908f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r3;
8918f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r4, %rs1;
8928f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r5, %r4, 16;
8938f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r5;
894310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs3, %rs4}, %r2;
895310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r6, %rs3;
8968f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r7, %r6, 16;
8978f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r7;
898310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs5, %rs6}, %r1;
899310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r8, %rs5;
9008f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r9, %r8, 16;
9018f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r9;
9028f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
9038f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r10, %f4;
9048f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r11, %r10, 16, 1;
9058f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r12, %r11, %r10;
9068f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r13, %r12, 32767;
9078f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
9088f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r14, %r10, 4194304;
9098f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r15, %r14, %r13, %p1;
910310e7987SAlex MacLean; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs7}, %r15; }
9118f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r16, %rs2;
9128f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r17, %r16, 16;
9138f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r17;
914310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r18, %rs4;
9158f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r19, %r18, 16;
9168f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f6, %r19;
917310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r20, %rs6;
9188f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r21, %r20, 16;
9198f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f7, %r21;
9208f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f8, %f7, %f6, %f5;
9218f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r22, %f8;
9228f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r23, %r22, 16, 1;
9238f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r24, %r23, %r22;
9248f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r25, %r24, 32767;
9258f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p2, %f8, %f8;
9268f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r26, %r22, 4194304;
9278f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r27, %r26, %r25, %p2;
928310e7987SAlex MacLean; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs8}, %r27; }
9298f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r28, %r15, -65536;
9308f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f9, %r28;
9318f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f32 %p3, %f9, 0f00000000;
9328f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r29, %r27, -65536;
9338f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f10, %r29;
9348f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f32 %p4, %f10, 0f00000000;
935310e7987SAlex MacLean; CHECK-SM70-NEXT:    selp.b16 %rs9, %rs8, 0x0000, %p4;
936310e7987SAlex MacLean; CHECK-SM70-NEXT:    selp.b16 %rs10, %rs7, 0x0000, %p3;
937310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 %r30, {%rs10, %rs9};
9388f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r30;
9398f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
9408f8016feSHugh Delaney  %1 = fmul <2 x bfloat> %a, %b
9418f8016feSHugh Delaney  %2 = fadd <2 x bfloat> %1, %c
9428f8016feSHugh Delaney  %3 = fcmp ogt <2 x bfloat> %2, <bfloat 0.0, bfloat 0.0>
9438f8016feSHugh Delaney  %4 = select <2 x i1> %3, <2 x bfloat> %2, <2 x bfloat> <bfloat 0.0, bfloat 0.0>
9448f8016feSHugh Delaney  ret <2 x bfloat> %4
9458f8016feSHugh Delaney}
9468f8016feSHugh Delaney
9478f8016feSHugh Delaney; FMA relu shouldn't be selected if the FMA operation has multiple uses
9488f8016feSHugh Delaneydefine <2 x bfloat> @fma_bf16x2_expanded_no_nans_multiple_uses_of_fma(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) #0 {
9498f8016feSHugh Delaney; CHECK-LABEL: fma_bf16x2_expanded_no_nans_multiple_uses_of_fma(
9508f8016feSHugh Delaney; CHECK:       {
951*5e5fd0e6Speterbell10; CHECK-NEXT:    .reg .b32 %r<11>;
9528f8016feSHugh Delaney; CHECK-EMPTY:
9538f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
9548f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_no_nans_multiple_uses_of_fma_param_2];
9558f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_no_nans_multiple_uses_of_fma_param_1];
9568f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_no_nans_multiple_uses_of_fma_param_0];
9578f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.bf16x2 %r4, %r3, %r2, %r1;
9588f8016feSHugh Delaney; CHECK-NEXT:    mov.b32 %r5, 0;
9598f8016feSHugh Delaney; CHECK-NEXT:    max.bf16x2 %r6, %r4, %r5;
960*5e5fd0e6Speterbell10; CHECK-NEXT:    mov.b32 %r7, 1065369472;
961*5e5fd0e6Speterbell10; CHECK-NEXT:    mov.b32 %r8, 1088438496;
962*5e5fd0e6Speterbell10; CHECK-NEXT:    fma.rn.bf16x2 %r9, %r4, %r7, %r8;
963*5e5fd0e6Speterbell10; CHECK-NEXT:    fma.rn.bf16x2 %r10, %r6, %r7, %r9;
964*5e5fd0e6Speterbell10; CHECK-NEXT:    st.param.b32 [func_retval0], %r10;
9658f8016feSHugh Delaney; CHECK-NEXT:    ret;
9668f8016feSHugh Delaney;
9678f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16x2_expanded_no_nans_multiple_uses_of_fma(
9688f8016feSHugh Delaney; CHECK-FTZ:       {
969310e7987SAlex MacLean; CHECK-FTZ-NEXT:    .reg .b16 %rs<7>;
9708f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<20>;
9718f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .f32 %f<11>;
9728f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
9738f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
9748f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_no_nans_multiple_uses_of_fma_param_2];
9758f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_no_nans_multiple_uses_of_fma_param_1];
9768f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_no_nans_multiple_uses_of_fma_param_0];
9778f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.bf16x2 %r4, %r3, %r2, %r1;
9788f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %r5, 0;
9798f8016feSHugh Delaney; CHECK-FTZ-NEXT:    max.bf16x2 %r6, %r4, %r5;
9808f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 {%rs1, %rs2}, %r4;
9814b24ab4bSAlex MacLean; CHECK-FTZ-NEXT:    cvt.u32.u16 %r7, %rs2;
9828f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r8, %r7, 16;
9838f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f1, %r8;
9848f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.ftz.f32 %f2, %f1, 0f40E00000;
985310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.rn.bf16.f32 %rs3, %f2;
9864b24ab4bSAlex MacLean; CHECK-FTZ-NEXT:    cvt.u32.u16 %r9, %rs1;
9878f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r10, %r9, 16;
9888f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f3, %r10;
9898f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.ftz.f32 %f4, %f3, 0f40E00000;
990310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.rn.bf16.f32 %rs4, %f4;
991310e7987SAlex MacLean; CHECK-FTZ-NEXT:    mov.b32 {%rs5, %rs6}, %r6;
992310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.u32.u16 %r11, %rs5;
9938f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r12, %r11, 16;
9948f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f5, %r12;
995310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.u32.u16 %r13, %rs4;
9968f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r14, %r13, 16;
9978f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f6, %r14;
9988f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.ftz.f32 %f7, %f5, %f6;
999310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.u32.u16 %r15, %rs6;
10008f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r16, %r15, 16;
10018f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f8, %r16;
1002310e7987SAlex MacLean; CHECK-FTZ-NEXT:    cvt.u32.u16 %r17, %rs3;
10038f8016feSHugh Delaney; CHECK-FTZ-NEXT:    shl.b32 %r18, %r17, 16;
10048f8016feSHugh Delaney; CHECK-FTZ-NEXT:    mov.b32 %f9, %r18;
10058f8016feSHugh Delaney; CHECK-FTZ-NEXT:    add.ftz.f32 %f10, %f8, %f9;
10064b24ab4bSAlex MacLean; CHECK-FTZ-NEXT:    cvt.rn.bf16x2.f32 %r19, %f10, %f7;
10078f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r19;
10088f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
10098f8016feSHugh Delaney;
10108f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16x2_expanded_no_nans_multiple_uses_of_fma(
10118f8016feSHugh Delaney; CHECK-SM70:       {
10128f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<9>;
1013310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<11>;
1014310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b32 %r<61>;
10158f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<19>;
10168f8016feSHugh Delaney; CHECK-SM70-EMPTY:
10178f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
10188f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_no_nans_multiple_uses_of_fma_param_0];
10198f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_no_nans_multiple_uses_of_fma_param_1];
10208f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_no_nans_multiple_uses_of_fma_param_2];
10218f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r3;
10228f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r4, %rs2;
10238f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r5, %r4, 16;
10248f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r5;
1025310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs3, %rs4}, %r2;
1026310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r6, %rs4;
10278f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r7, %r6, 16;
10288f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r7;
1029310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs5, %rs6}, %r1;
1030310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r8, %rs6;
10318f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r9, %r8, 16;
10328f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r9;
10338f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
10348f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r10, %f4;
10358f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r11, %r10, 16, 1;
10368f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r12, %r11, %r10;
10378f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r13, %r12, 32767;
10388f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
10398f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r14, %r10, 4194304;
10408f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r15, %r14, %r13, %p1;
1041310e7987SAlex MacLean; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs7}, %r15; }
10428f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r16, %rs1;
10438f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r17, %r16, 16;
10448f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r17;
1045310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r18, %rs3;
10468f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r19, %r18, 16;
10478f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f6, %r19;
1048310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r20, %rs5;
10498f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r21, %r20, 16;
10508f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f7, %r21;
10518f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f8, %f7, %f6, %f5;
10528f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r22, %f8;
10538f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r23, %r22, 16, 1;
10548f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r24, %r23, %r22;
10558f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r25, %r24, 32767;
10568f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p2, %f8, %f8;
10578f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r26, %r22, 4194304;
10588f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r27, %r26, %r25, %p2;
1059310e7987SAlex MacLean; CHECK-SM70-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs8}, %r27; }
10608f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r28, %r15, -65536;
10618f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f9, %r28;
10628f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f32 %p3, %f9, 0f00000000;
10638f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r29, %r27, -65536;
10648f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f10, %r29;
10658f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.gt.f32 %p4, %f10, 0f00000000;
1066310e7987SAlex MacLean; CHECK-SM70-NEXT:    selp.b16 %rs9, %rs8, 0x0000, %p4;
1067310e7987SAlex MacLean; CHECK-SM70-NEXT:    selp.b16 %rs10, %rs7, 0x0000, %p3;
10688f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.f32 %f11, %f10, 0f40E00000;
10698f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r30, %f11;
10708f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r31, %r30, 16, 1;
10718f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r32, %r31, %r30;
10728f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r33, %r32, 32767;
10738f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p5, %f11, %f11;
10748f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r34, %r30, 4194304;
10758f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r35, %r34, %r33, %p5;
10768f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.f32 %f12, %f9, 0f40E00000;
10778f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r36, %f12;
10788f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r37, %r36, 16, 1;
10798f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r38, %r37, %r36;
10808f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r39, %r38, 32767;
10818f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p6, %f12, %f12;
10828f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r40, %r36, 4194304;
10838f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r41, %r40, %r39, %p6;
1084310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r42, %rs10;
10858f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r43, %r42, 16;
10868f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f13, %r43;
10878f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r44, %r41, -65536;
10888f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f14, %r44;
10898f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.f32 %f15, %f13, %f14;
10908f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r45, %f15;
10918f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r46, %r45, 16, 1;
10928f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r47, %r46, %r45;
10938f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r48, %r47, 32767;
10948f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p7, %f15, %f15;
10958f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r49, %r45, 4194304;
10968f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r50, %r49, %r48, %p7;
1097310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r51, %rs9;
10988f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r52, %r51, 16;
10998f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f16, %r52;
11008f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r53, %r35, -65536;
11018f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f17, %r53;
11028f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.f32 %f18, %f16, %f17;
11038f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r54, %f18;
11048f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r55, %r54, 16, 1;
11058f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r56, %r55, %r54;
11068f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r57, %r56, 32767;
11078f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p8, %f18, %f18;
11088f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r58, %r54, 4194304;
11098f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r59, %r58, %r57, %p8;
1110a1f5fe8cSFraser Cormack; CHECK-SM70-NEXT:    prmt.b32 %r60, %r59, %r50, 0x7632U;
11118f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r60;
11128f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
11138f8016feSHugh Delaney  %1 = fmul <2 x bfloat> %a, %b
11148f8016feSHugh Delaney  %2 = fadd <2 x bfloat> %1, %c
11158f8016feSHugh Delaney  %3 = fcmp ogt <2 x bfloat> %2, <bfloat 0.0, bfloat 0.0>
11168f8016feSHugh Delaney  %4 = select <2 x i1> %3, <2 x bfloat> %2, <2 x bfloat> <bfloat 0.0, bfloat 0.0>
11178f8016feSHugh Delaney  %5 = fadd <2 x bfloat> %2, <bfloat 7.0, bfloat 7.0>
11188f8016feSHugh Delaney  %6 = fadd <2 x bfloat> %4, %5
11198f8016feSHugh Delaney  ret <2 x bfloat> %6
11208f8016feSHugh Delaney}
11218f8016feSHugh Delaney
11228f8016feSHugh Delaneydefine <2 x bfloat> @fma_bf16x2_expanded_maxnum_no_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) #0 {
11238f8016feSHugh Delaney; CHECK-LABEL: fma_bf16x2_expanded_maxnum_no_nans(
11248f8016feSHugh Delaney; CHECK:       {
11258f8016feSHugh Delaney; CHECK-NEXT:    .reg .b32 %r<5>;
11268f8016feSHugh Delaney; CHECK-EMPTY:
11278f8016feSHugh Delaney; CHECK-NEXT:  // %bb.0:
11288f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_maxnum_no_nans_param_2];
11298f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_maxnum_no_nans_param_1];
11308f8016feSHugh Delaney; CHECK-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_maxnum_no_nans_param_0];
11318f8016feSHugh Delaney; CHECK-NEXT:    fma.rn.relu.bf16x2 %r4, %r3, %r2, %r1;
11328f8016feSHugh Delaney; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
11338f8016feSHugh Delaney; CHECK-NEXT:    ret;
11348f8016feSHugh Delaney;
11358f8016feSHugh Delaney; CHECK-FTZ-LABEL: fma_bf16x2_expanded_maxnum_no_nans(
11368f8016feSHugh Delaney; CHECK-FTZ:       {
11378f8016feSHugh Delaney; CHECK-FTZ-NEXT:    .reg .b32 %r<5>;
11388f8016feSHugh Delaney; CHECK-FTZ-EMPTY:
11398f8016feSHugh Delaney; CHECK-FTZ-NEXT:  // %bb.0:
11408f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_maxnum_no_nans_param_2];
11418f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_maxnum_no_nans_param_1];
11428f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_maxnum_no_nans_param_0];
11438f8016feSHugh Delaney; CHECK-FTZ-NEXT:    fma.rn.relu.bf16x2 %r4, %r3, %r2, %r1;
11448f8016feSHugh Delaney; CHECK-FTZ-NEXT:    st.param.b32 [func_retval0], %r4;
11458f8016feSHugh Delaney; CHECK-FTZ-NEXT:    ret;
11468f8016feSHugh Delaney;
11478f8016feSHugh Delaney; CHECK-SM70-LABEL: fma_bf16x2_expanded_maxnum_no_nans(
11488f8016feSHugh Delaney; CHECK-SM70:       {
11498f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .pred %p<5>;
1150310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b16 %rs<7>;
1151310e7987SAlex MacLean; CHECK-SM70-NEXT:    .reg .b32 %r<43>;
11528f8016feSHugh Delaney; CHECK-SM70-NEXT:    .reg .f32 %f<13>;
11538f8016feSHugh Delaney; CHECK-SM70-EMPTY:
11548f8016feSHugh Delaney; CHECK-SM70-NEXT:  // %bb.0:
11558f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r1, [fma_bf16x2_expanded_maxnum_no_nans_param_0];
11568f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r2, [fma_bf16x2_expanded_maxnum_no_nans_param_1];
11578f8016feSHugh Delaney; CHECK-SM70-NEXT:    ld.param.b32 %r3, [fma_bf16x2_expanded_maxnum_no_nans_param_2];
11588f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r3;
11598f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r4, %rs1;
11608f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r5, %r4, 16;
11618f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f1, %r5;
1162310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs3, %rs4}, %r2;
1163310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r6, %rs3;
11648f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r7, %r6, 16;
11658f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f2, %r7;
1166310e7987SAlex MacLean; CHECK-SM70-NEXT:    mov.b32 {%rs5, %rs6}, %r1;
1167310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r8, %rs5;
11688f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r9, %r8, 16;
11698f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f3, %r9;
11708f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
11718f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r10, %f4;
11728f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r11, %r10, 16, 1;
11738f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r12, %r11, %r10;
11748f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r13, %r12, 32767;
11758f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p1, %f4, %f4;
11768f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r14, %r10, 4194304;
11778f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r15, %r14, %r13, %p1;
11788f8016feSHugh Delaney; CHECK-SM70-NEXT:    cvt.u32.u16 %r16, %rs2;
11798f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r17, %r16, 16;
11808f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f5, %r17;
1181310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r18, %rs4;
11828f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r19, %r18, 16;
11838f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f6, %r19;
1184310e7987SAlex MacLean; CHECK-SM70-NEXT:    cvt.u32.u16 %r20, %rs6;
11858f8016feSHugh Delaney; CHECK-SM70-NEXT:    shl.b32 %r21, %r20, 16;
11868f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f7, %r21;
11878f8016feSHugh Delaney; CHECK-SM70-NEXT:    fma.rn.f32 %f8, %f7, %f6, %f5;
11888f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r22, %f8;
11898f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r23, %r22, 16, 1;
11908f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r24, %r23, %r22;
11918f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r25, %r24, 32767;
11928f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p2, %f8, %f8;
11938f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r26, %r22, 4194304;
11948f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r27, %r26, %r25, %p2;
11958f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r28, %r27, -65536;
11968f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f9, %r28;
11978f8016feSHugh Delaney; CHECK-SM70-NEXT:    max.f32 %f10, %f9, 0f00000000;
11988f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r29, %f10;
11998f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r30, %r29, 16, 1;
12008f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r31, %r30, %r29;
12018f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r32, %r31, 32767;
12028f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p3, %f10, %f10;
12038f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r33, %r29, 4194304;
12048f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r34, %r33, %r32, %p3;
12058f8016feSHugh Delaney; CHECK-SM70-NEXT:    and.b32 %r35, %r15, -65536;
12068f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %f11, %r35;
12078f8016feSHugh Delaney; CHECK-SM70-NEXT:    max.f32 %f12, %f11, 0f00000000;
12088f8016feSHugh Delaney; CHECK-SM70-NEXT:    mov.b32 %r36, %f12;
12098f8016feSHugh Delaney; CHECK-SM70-NEXT:    bfe.u32 %r37, %r36, 16, 1;
12108f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r38, %r37, %r36;
12118f8016feSHugh Delaney; CHECK-SM70-NEXT:    add.s32 %r39, %r38, 32767;
12128f8016feSHugh Delaney; CHECK-SM70-NEXT:    setp.nan.f32 %p4, %f12, %f12;
12138f8016feSHugh Delaney; CHECK-SM70-NEXT:    or.b32 %r40, %r36, 4194304;
12148f8016feSHugh Delaney; CHECK-SM70-NEXT:    selp.b32 %r41, %r40, %r39, %p4;
1215a1f5fe8cSFraser Cormack; CHECK-SM70-NEXT:    prmt.b32 %r42, %r41, %r34, 0x7632U;
12168f8016feSHugh Delaney; CHECK-SM70-NEXT:    st.param.b32 [func_retval0], %r42;
12178f8016feSHugh Delaney; CHECK-SM70-NEXT:    ret;
12188f8016feSHugh Delaney  %1 = fmul <2 x bfloat> %a, %b
12198f8016feSHugh Delaney  %2 = fadd <2 x bfloat> %1, %c
12208f8016feSHugh Delaney  %3 = call <2 x bfloat> @llvm.maxnum.bf16x2(<2 x bfloat> %2, <2 x bfloat> <bfloat 0.0, bfloat 0.0>)
12218f8016feSHugh Delaney  ret <2 x bfloat> %3
12228f8016feSHugh Delaney}
12238f8016feSHugh Delaney
12248f8016feSHugh Delaneyattributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "unsafe-fp-math"="true" }
12258f8016feSHugh Delaneyattributes #1 = { "unsafe-fp-math"="true" }
1226