1# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100 2# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150 3# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150 4 5--- 6 7# GCN-LABEL: name: vop3 8# GCN: %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64_dpp %3, %0, %1, %5, 1, 1, 15, 15, 1, implicit $exec 9# GCN: %8:vgpr_32 = V_CVT_PK_U8_F32_e64_dpp %3, 4, %0, 2, %2, 2, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec 10# GCN: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec 11# GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec 12# GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %3, 0, %1, 0, 2, 0, %7, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec 13name: vop3 14tracksRegLiveness: true 15body: | 16 bb.0: 17 liveins: $vgpr0, $vgpr1, $vgpr2 18 19 %0:vgpr_32 = COPY $vgpr0 20 %1:vgpr_32 = COPY $vgpr1 21 %2:vgpr_32 = COPY $vgpr2 22 %3:vgpr_32 = IMPLICIT_DEF 23 %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec 24 25 %5:sreg_32_xm0_xexec = IMPLICIT_DEF 26 %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64 %4, %1, %5, 1, implicit $exec 27 28 %8:vgpr_32 = V_CVT_PK_U8_F32_e64 4, %4, 2, %2, 2, %1, 1, implicit $mode, implicit $exec 29 30 ; should not be combined because src2 literal is illegal 31 %9:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec 32 %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec 33 34 ; should not be combined on subtargets where src1 imm is illegal 35 %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec 36 %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec 37... 38--- 39 40# GCN-LABEL: name: vop3_sgpr_src1 41# GCN: %6:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %1, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec 42# GFX1100: %8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit $mode, implicit $exec 43# GFX1150: %8:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %1, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec 44# GFX1100: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec 45# GFX1150: %10:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %3, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec 46# GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit $mode, implicit $exec 47# GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, 42, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec 48# GCN: %14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit $mode, implicit $exec 49name: vop3_sgpr_src1 50tracksRegLiveness: true 51body: | 52 bb.0: 53 liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1 54 55 %0:vgpr_32 = COPY $vgpr0 56 %1:vgpr_32 = COPY $vgpr1 57 %2:sgpr_32 = COPY $sgpr0 58 %3:sgpr_32 = COPY $sgpr1 59 %4:vgpr_32 = IMPLICIT_DEF 60 61 ; should be combined because src2 allows sgpr 62 %5:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec 63 %6:vgpr_32 = V_MED3_F32_e64 0, %5, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec 64 65 ; should be combined only on subtargets that allow sgpr for src1 66 %7:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec 67 %8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit $mode, implicit $exec 68 69 ; should be combined only on subtargets that allow sgpr for src1 70 %9:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec 71 %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec 72 73 ; should be combined only on subtargets that allow inlinable constants for src1 74 %11:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec 75 %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit $mode, implicit $exec 76 77 ; should not be combined when literal constants are used 78 %13:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec 79 %14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit $mode, implicit $exec 80... 81--- 82 83# Regression test for src_modifiers on base u16 opcode 84# GCN-LABEL: name: vop3_u16 85# GCN: %5:vgpr_32 = V_ADD_NC_U16_fake16_e64_dpp %3, 0, %1, 0, %3, 0, 0, 1, 15, 15, 1, implicit $exec 86# GCN: %7:vgpr_32 = V_ADD_NC_U16_fake16_e64_dpp %3, 1, %5, 2, %5, 0, 0, 1, 15, 15, 1, implicit $exec 87# GCN: %9:vgpr_32 = V_ADD_NC_U16_fake16_e64 4, %8, 8, %7, 0, 0, implicit $exec 88name: vop3_u16 89tracksRegLiveness: true 90body: | 91 bb.0: 92 liveins: $vgpr0, $vgpr1, $vgpr2 93 94 %0:vgpr_32 = COPY $vgpr0 95 %1:vgpr_32 = COPY $vgpr1 96 %2:vgpr_32 = COPY $vgpr2 97 %3:vgpr_32 = IMPLICIT_DEF 98 %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec 99 %5:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, %4, 0, %3, 0, 0, implicit $exec 100 %6:vgpr_32 = V_MOV_B32_dpp %3, %5, 1, 15, 15, 1, implicit $exec 101 %7:vgpr_32 = V_ADD_NC_U16_fake16_e64 1, %6, 2, %5, 0, 0, implicit $exec 102 %8:vgpr_32 = V_MOV_B32_dpp %3, %7, 1, 15, 15, 1, implicit $exec 103 %9:vgpr_32 = V_ADD_NC_U16_fake16_e64 4, %8, 8, %7, 0, 0, implicit $exec 104... 105 106name: vop3p 107tracksRegLiveness: true 108body: | 109 bb.0: 110 liveins: $vgpr0, $vgpr1, $vgpr2 111 112 ; GCN-LABEL: name: vop3p 113 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 114 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 115 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 116 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 117 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 118 ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec 119 ; GCN: [[V_DOT2_F32_F16_:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp]], 0, [[COPY]], 0, [[COPY2]], 0, 5, 0, 0, 0, implicit $mode, implicit $exec 120 ; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec 121 ; GCN: [[V_DOT2_F32_F16_1:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, 4, 0, 0, implicit $mode, implicit $exec 122 ; GCN: [[V_DOT2_F32_F16_dpp:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16_dpp [[DEF]], 10, [[COPY1]], 8, [[COPY]], 9, [[COPY2]], 1, 0, 7, 4, 5, 1, 15, 15, 1, implicit $mode, implicit $exec 123 ; GCN: [[V_FMA_MIX_F32_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIX_F32_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec 124 ; GCN: [[V_FMA_MIXLO_F16_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIXLO_F16_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 0, [[COPY2]], 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec 125 ; GCN: [[V_FMA_MIXHI_F16_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIXHI_F16_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, [[COPY]], 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec 126 %0:vgpr_32 = COPY $vgpr0 127 %1:vgpr_32 = COPY $vgpr1 128 %2:vgpr_32 = COPY $vgpr2 129 %3:vgpr_32 = IMPLICIT_DEF 130 131 ; this should not be combined because op_sel is not zero 132 %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec 133 %5:vgpr_32 = V_DOT2_F32_F16 0, %4, 0, %0, 0, %2, 0, 5, 0, 0, 0, implicit $mode, implicit $exec 134 135 ; this should not be combined because op_sel_hi is not all set 136 %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec 137 %7:vgpr_32 = V_DOT2_F32_F16 0, %6, 0, %0, 0, %2, 0, 0, 4, 0, 0, implicit $mode, implicit $exec 138 139 %8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec 140 %9:vgpr_32 = V_DOT2_F32_F16 10, %8, 8, %0, 9, %2, 1, 0, 7, 4, 5, implicit $mode, implicit $exec 141 142 %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec 143 %11:vgpr_32 = V_FMA_MIX_F32 8, %10, 8, %0, 8, %2, 1, 0, 7, implicit $mode, implicit $exec 144 145 %12:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec 146 %13:vgpr_32 = V_FMA_MIXLO_F16 8, %12, 8, %0, 8, %2, 0, %2, 0, 7, implicit $mode, implicit $exec 147 148 %14:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec 149 %15:vgpr_32 = V_FMA_MIXHI_F16 8, %14, 8, %0, 8, %2, 1, %0, 0, 7, implicit $mode, implicit $exec 150 151... 152 153# GCN-LABEL: name: fmac_e64 154# GCN: %5:vgpr_32 = V_FMAC_F32_e64_dpp %3, 2, %0, 2, %1, 2, %2, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec 155name: fmac_e64 156tracksRegLiveness: true 157body: | 158 bb.0: 159 liveins: $vgpr0, $vgpr1, $vgpr2 160 161 %0:vgpr_32 = COPY $vgpr0 162 %1:vgpr_32 = COPY $vgpr1 163 %2:vgpr_32 = COPY $vgpr2 164 %3:vgpr_32 = IMPLICIT_DEF 165 %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec 166 %6:vgpr_32 = V_FMAC_F32_e64 2, %4, 2, %1, 2, %2, 1, 2, implicit $mode, implicit $exec 167... 168 169# when the DPP source isn't a src0 operand the operation should be commuted if possible 170# GCN-LABEL: name: dpp_commute_shrink 171# GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 172# GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec 173# GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 174# GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec 175# GCN: %16:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 176name: dpp_commute_shrink 177tracksRegLiveness: true 178body: | 179 bb.0: 180 liveins: $vgpr0, $vgpr1 181 182 %0:vgpr_32 = COPY $vgpr0 183 %1:vgpr_32 = COPY $vgpr1 184 185 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 186 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 187 %4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 0, implicit $exec 188 189 %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec 190 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec 191 %7:vgpr_32 = V_AND_B32_e64 %1, %6, implicit $exec 192 193 %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec 194 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec 195 %10:vgpr_32 = V_MAX_I32_e64 %1, %9, implicit $exec 196 197 %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec 198 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec 199 %13:vgpr_32 = V_MIN_I32_e64 %1, %12, implicit $exec 200 201 %14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 202 %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec 203 %16:vgpr_32 = V_SUB_U32_e64 %1, %15, 0, implicit $exec 204 205... 206 207# do not combine, dpp arg used twice 208# GCN-LABEL: name: dpp_arg_twice 209# GCN: %4:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %3, 2, %3, 1, 2, implicit $mode, implicit $exec 210# GCN: %6:vgpr_32 = V_FMA_F32_e64 2, %5, 2, %1, 2, %5, 1, 2, implicit $mode, implicit $exec 211# GCN: %8:vgpr_32 = V_FMA_F32_e64 2, %7, 2, %7, 2, %1, 1, 2, implicit $mode, implicit $exec 212name: dpp_arg_twice 213tracksRegLiveness: true 214body: | 215 bb.0: 216 liveins: $vgpr0, $vgpr1 217 218 %0:vgpr_32 = COPY $vgpr0 219 %1:vgpr_32 = COPY $vgpr1 220 %2:vgpr_32 = IMPLICIT_DEF 221 222 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 223 %4:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %3, 2, %3, 1, 2, implicit $mode, implicit $exec 224 225 %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 226 %6:vgpr_32 = V_FMA_F32_e64 2, %5, 2, %1, 2, %5, 1, 2, implicit $mode, implicit $exec 227 228 %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 229 %8:vgpr_32 = V_FMA_F32_e64 2, %7, 2, %7, 2, %1, 1, 2, implicit $mode, implicit $exec 230 231... 232 233# when the dpp source isn't a src0 operand the operation should be commuted if possible 234# GCN-LABEL: name: dpp_commute_e64 235# GCN: %4:vgpr_32 = V_MUL_U32_U24_e64_dpp %1, %0, %1, 1, 1, 14, 15, 0, implicit $exec 236# GCN: %7:vgpr_32 = V_FMA_F32_e64_dpp %5, 2, %0, 1, %1, 2, %1, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec 237# GCN: %10:vgpr_32 = V_SUBREV_U32_e64_dpp %1, %0, %1, 1, 1, 14, 15, 0, implicit $exec 238# GCN: %13:vgpr_32, %14:sreg_32_xm0_xexec = V_ADD_CO_U32_e64_dpp %1, %0, %1, 0, 1, 14, 15, 0, implicit $exec 239# GCN: %17:vgpr_32, %18:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 5, %16, 0, implicit $exec 240name: dpp_commute_e64 241tracksRegLiveness: true 242body: | 243 bb.0: 244 liveins: $vgpr0, $vgpr1 245 246 %0:vgpr_32 = COPY $vgpr0 247 %1:vgpr_32 = COPY $vgpr1 248 249 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 250 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 251 %4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 1, implicit $exec 252 253 %5:vgpr_32 = IMPLICIT_DEF 254 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 1, implicit $exec 255 %7:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %6, 2, %1, 1, 2, implicit $mode, implicit $exec 256 257 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 258 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec 259 %10:vgpr_32 = V_SUB_U32_e64 %1, %9, 1, implicit $exec 260 261 %11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 262 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 14, 15, 0, implicit $exec 263 %13:vgpr_32, %14:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1, %12, 0, implicit $exec 264 265 ; this cannot be combined because immediate as src0 isn't commutable 266 %15:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 267 %16:vgpr_32 = V_MOV_B32_dpp %15, %0, 1, 14, 15, 0, implicit $exec 268 %17:vgpr_32, %18:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 5, %16, 0, implicit $exec 269... 270 271--- 272 273# check for floating point modifiers 274# GCN-LABEL: name: add_f32_e64 275# GCN: %4:vgpr_32 = V_ADD_F32_e64_dpp %2, 0, %1, 0, %0, 0, 1, 1, 15, 15, 1, implicit $mode, implicit $exec 276# GCN: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec 277# GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec 278# GCN: %10:vgpr_32 = V_ADD_F32_e64_dpp %2, 4, %1, 8, %0, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec 279 280name: add_f32_e64 281tracksRegLiveness: true 282body: | 283 bb.0: 284 liveins: $vgpr0, $vgpr1 285 286 %0:vgpr_32 = COPY $vgpr0 287 %1:vgpr_32 = COPY $vgpr1 288 %2:vgpr_32 = IMPLICIT_DEF 289 290 ; this should be combined as e64 291 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 292 %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec 293 294 ; this should be combined and shrunk as all modifiers are default 295 %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 296 %6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec 297 298 ; this should be combined and shrunk as modifiers other than abs|neg are default 299 %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 300 %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec 301 302 ; this should be combined as e64 303 %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 304 %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec 305... 306 307# check for e64 modifiers 308# GCN-LABEL: name: add_u32_e64 309# GCN: %4:vgpr_32 = V_ADD_U32_dpp %2, %0, %1, 1, 15, 15, 1, implicit $exec 310# GCN: %6:vgpr_32 = V_ADD_U32_e64_dpp %2, %0, %1, 1, 1, 15, 15, 1, implicit $exec 311 312name: add_u32_e64 313tracksRegLiveness: true 314body: | 315 bb.0: 316 liveins: $vgpr0, $vgpr1 317 318 %0:vgpr_32 = COPY $vgpr0 319 %1:vgpr_32 = COPY $vgpr1 320 %2:vgpr_32 = IMPLICIT_DEF 321 322 ; this should be combined and shrunk as all modifiers are default 323 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec 324 %4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec 325 326 ; this should be combined as _e64 327 %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec 328 %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec 329... 330 331# tests on sequences of dpp consumers 332# GCN-LABEL: name: dpp_seq 333# GCN: %4:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 334# GCN: %5:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 335# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 336# broken sequence: 337# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 338 339name: dpp_seq 340tracksRegLiveness: true 341body: | 342 bb.0: 343 liveins: $vgpr0, $vgpr1 344 %0:vgpr_32 = COPY $vgpr0 345 %1:vgpr_32 = COPY $vgpr1 346 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 347 348 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 349 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 350 %5:vgpr_32 = V_SUB_U32_e32 %1, %3, implicit $exec 351 %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec 352 353 %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 354 %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec 355 ; this breaks the sequence 356 %9:vgpr_32 = V_SUB_U32_e32 5, %7, implicit $exec 357... 358 359# tests on sequences of dpp consumers followed by control flow 360# GCN-LABEL: name: dpp_seq_cf 361# GCN: %4:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 362# GCN: %5:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 363# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 364 365name: dpp_seq_cf 366tracksRegLiveness: true 367body: | 368 bb.0: 369 successors: %bb.1, %bb.2 370 liveins: $vgpr0, $vgpr1 371 %0:vgpr_32 = COPY $vgpr0 372 %1:vgpr_32 = COPY $vgpr1 373 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 374 375 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 376 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 377 %5:vgpr_32 = V_SUB_U32_e32 %1, %3, implicit $exec 378 %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec 379 380 %7:sreg_32 = V_CMP_EQ_U32_e64 %5, %6, implicit $exec 381 %8:sreg_32 = SI_IF %7, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec 382 S_BRANCH %bb.1 383 384 bb.1: 385 successors: %bb.2 386 387 bb.2: 388 SI_END_CF %8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec 389... 390 391# GCN-LABEL: name: old_in_diff_bb 392# GCN: %4:vgpr_32 = V_ADD_U32_dpp %0, %1, %0, 1, 1, 1, 0, implicit $exec 393 394name: old_in_diff_bb 395tracksRegLiveness: true 396body: | 397 bb.0: 398 successors: %bb.1 399 liveins: $vgpr0, $vgpr1 400 401 %0:vgpr_32 = COPY $vgpr0 402 %1:vgpr_32 = COPY $vgpr1 403 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 404 S_BRANCH %bb.1 405 406 bb.1: 407 %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec 408 %4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec 409... 410 411# old reg def is in diff BB but bound_ctrl:1 - can combine 412# GCN-LABEL: name: old_in_diff_bb_bctrl_zero 413# GCN: %4:vgpr_32 = V_ADD_U32_dpp {{%[0-9]}}, %0, %1, 1, 15, 15, 1, implicit $exec 414 415name: old_in_diff_bb_bctrl_zero 416tracksRegLiveness: true 417body: | 418 bb.0: 419 successors: %bb.1 420 liveins: $vgpr0, $vgpr1 421 422 %0:vgpr_32 = COPY $vgpr0 423 %1:vgpr_32 = COPY $vgpr1 424 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 425 S_BRANCH %bb.1 426 427 bb.1: 428 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 429 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 430... 431 432# EXEC mask changed between def and use - cannot combine 433# GCN-LABEL: name: exec_changed 434# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 435 436name: exec_changed 437tracksRegLiveness: true 438body: | 439 bb.0: 440 liveins: $vgpr0, $vgpr1 441 442 %0:vgpr_32 = COPY $vgpr0 443 %1:vgpr_32 = COPY $vgpr1 444 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 445 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 446 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 447 %5:sreg_64 = COPY $exec, implicit-def $exec 448 %6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 449... 450 451# test if $old definition is correctly tracked through subreg manipulation pseudos 452 453# GCN-LABEL: name: mul_old_subreg 454# GCN: %7:vgpr_32 = V_MUL_I32_I24_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec 455 456name: mul_old_subreg 457tracksRegLiveness: true 458body: | 459 bb.0: 460 liveins: $vgpr0, $vgpr1 461 462 %0:vreg_64 = COPY $vgpr0 463 %1:vgpr_32 = COPY $vgpr1 464 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 465 %3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec 466 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 467 %5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4 468 %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec 469 %7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec 470... 471 472# GCN-LABEL: name: add_old_subreg 473# GCN: %5:vgpr_32 = V_ADD_U32_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec 474 475name: add_old_subreg 476tracksRegLiveness: true 477body: | 478 bb.0: 479 liveins: $vgpr0, $vgpr1 480 481 %0:vreg_64 = COPY $vgpr0 482 %1:vgpr_32 = COPY $vgpr1 483 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 484 %3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted 485 %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec 486 %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec 487... 488 489# GCN-LABEL: name: add_old_subreg_undef 490# GCN: %5:vgpr_32 = V_ADD_U32_dpp undef %3.sub1, %1, %0.sub1, 1, 15, 15, 1, implicit $exec 491 492name: add_old_subreg_undef 493tracksRegLiveness: true 494body: | 495 bb.0: 496 liveins: $vgpr0, $vgpr1 497 498 %0:vreg_64 = COPY $vgpr0 499 %1:vgpr_32 = COPY $vgpr1 500 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 501 %3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef 502 %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec 503 %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec 504... 505 506# Test instruction which does not have modifiers in VOP1 form but does in DPP form. 507# GCN-LABEL: name: dpp_vop1 508# GCN: %3:vgpr_32 = V_CEIL_F32_dpp %0, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec 509name: dpp_vop1 510tracksRegLiveness: true 511body: | 512 bb.0: 513 %1:vgpr_32 = IMPLICIT_DEF 514 %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 515 %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec 516... 517 518# Test instruction which does not have modifiers in VOP2 form but does in DPP form. 519# GCN-LABEL: name: dpp_min 520# GCN: %3:vgpr_32 = V_MIN_F32_dpp %0, 0, undef %2:vgpr_32, 0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec 521name: dpp_min 522tracksRegLiveness: true 523body: | 524 bb.0: 525 %1:vgpr_32 = IMPLICIT_DEF 526 %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 527 %4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec 528... 529 530# Test an undef old operand 531# GCN-LABEL: name: dpp_undef_old 532# GCN: %3:vgpr_32 = V_CEIL_F32_dpp undef %1:vgpr_32, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec 533name: dpp_undef_old 534tracksRegLiveness: true 535body: | 536 bb.0: 537 %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 538 %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec 539... 540 541# Do not combine a dpp mov which writes a physreg. 542# GCN-LABEL: name: phys_dpp_mov_dst 543# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec 544# GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec 545name: phys_dpp_mov_dst 546tracksRegLiveness: true 547body: | 548 bb.0: 549 $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 550 %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec 551... 552 553# Do not combine a dpp mov which reads a physreg. 554# GCN-LABEL: name: phys_dpp_mov_old_src 555# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec 556# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec 557name: phys_dpp_mov_old_src 558tracksRegLiveness: true 559body: | 560 bb.0: 561 %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 562 %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec 563... 564 565# Do not combine a dpp mov which reads a physreg. 566# GCN-LABEL: name: phys_dpp_mov_src 567# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec 568# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec 569name: phys_dpp_mov_src 570tracksRegLiveness: true 571body: | 572 bb.0: 573 %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec 574 %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec 575... 576 577# GCN-LABEL: name: dpp_reg_sequence_both_combined 578# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 579# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 580# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 581# GCN: %9:vgpr_32 = IMPLICIT_DEF 582# GCN: %8:vgpr_32 = IMPLICIT_DEF 583# GCN: %6:vgpr_32 = V_ADD_U32_dpp %9, %1.sub0, %2, 1, 15, 15, 1, implicit $exec 584# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 585name: dpp_reg_sequence_both_combined 586tracksRegLiveness: true 587body: | 588 bb.0: 589 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo 590 591 %0:vreg_64 = COPY $vgpr0_vgpr1 592 %1:vreg_64 = COPY $vgpr2_vgpr3 593 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 594 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 595 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 596 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 597 %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec 598 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 599... 600 601# GCN-LABEL: name: dpp_reg_sequence_first_combined 602# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 603# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 604# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 605# GCN: %8:vgpr_32 = IMPLICIT_DEF 606# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 607# GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1 608# GCN: %6:vgpr_32 = V_ADD_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit $exec 609# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec 610name: dpp_reg_sequence_first_combined 611tracksRegLiveness: true 612body: | 613 bb.0: 614 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo 615 616 %0:vreg_64 = COPY $vgpr0_vgpr1 617 %1:vreg_64 = COPY $vgpr2_vgpr3 618 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 619 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 620 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 621 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 622 %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec 623 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 624... 625 626# GCN-LABEL: name: dpp_reg_sequence_second_combined 627# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 628# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 629# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 630# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 631# GCN: %8:vgpr_32 = IMPLICIT_DEF 632# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1 633# GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec 634# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 635name: dpp_reg_sequence_second_combined 636tracksRegLiveness: true 637body: | 638 bb.0: 639 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo 640 641 %0:vreg_64 = COPY $vgpr0_vgpr1 642 %1:vreg_64 = COPY $vgpr2_vgpr3 643 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 644 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 645 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 646 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 647 %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec 648 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 649... 650 651# GCN-LABEL: name: dpp_reg_sequence_none_combined 652# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 653# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 654# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 655# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 656# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 657# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 658# GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec 659# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec 660name: dpp_reg_sequence_none_combined 661tracksRegLiveness: true 662body: | 663 bb.0: 664 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo 665 666 %0:vreg_64 = COPY $vgpr0_vgpr1 667 %1:vreg_64 = COPY $vgpr2_vgpr3 668 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 669 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 670 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 671 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 672 %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec 673 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 674... 675 676# GCN-LABEL: name: dpp_reg_sequence_exec_changed 677# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 678# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 679# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 680# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 681# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 682# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 683# GCN: S_BRANCH %bb.1 684# GCN: bb.1: 685# GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec 686# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec 687name: dpp_reg_sequence_exec_changed 688tracksRegLiveness: true 689body: | 690 bb.0: 691 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo 692 693 %0:vreg_64 = COPY $vgpr0_vgpr1 694 %1:vreg_64 = COPY $vgpr2_vgpr3 695 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 696 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 697 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 698 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 699 S_BRANCH %bb.1 700 701 bb.1: 702 liveins: $vcc_lo 703 %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec 704 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 705... 706 707# GCN-LABEL: name: dpp_reg_sequence_subreg 708# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 709# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 710# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 711# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 712# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 713# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 714# GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1 715# GCN: %7:vgpr_32 = V_ADD_U32_e32 %6.sub0, %2, implicit $exec 716# GCN: %8:vgpr_32 = V_ADDC_U32_e32 %6.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec 717name: dpp_reg_sequence_subreg 718tracksRegLiveness: true 719body: | 720 bb.0: 721 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo 722 723 %0:vreg_64 = COPY $vgpr0_vgpr1 724 %1:vreg_64 = COPY $vgpr2_vgpr3 725 %8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 726 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 727 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 728 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 729 %5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1 730 %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %8, implicit $exec 731 %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %8, implicit-def $vcc, implicit $vcc, implicit $exec 732... 733 734# GCN-LABEL: name: dpp_reg_sequence_src2_reject 735#GCN: %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 736#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 737#GCN: %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 738#GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 739#GCN: %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub0, 1, 2, implicit $mode, implicit $exec 740#GCN: %7:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec 741name: dpp_reg_sequence_src2_reject 742tracksRegLiveness: true 743body: | 744 bb.0: 745 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 746 747 %0:vreg_64 = COPY $vgpr0_vgpr1 748 %1:vreg_64 = COPY $vgpr2_vgpr3 749 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 750 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 751 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 752 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 753 ; use of dpp arg as src2, reject 754 %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub0, 1, 2, implicit $mode, implicit $exec 755 ; cannot commute src0 and src2, and %4.sub0 already rejected, reject 756 %7:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec 757... 758 759# GCN-LABEL: name: dpp_reg_sequence_src2 760#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 761#GCN: %4:vreg_64 = REG_SEQUENCE undef %2:vgpr_32, %subreg.sub0, %3, %subreg.sub1 762#GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 763#GCN: %6:vgpr_32 = V_FMA_F32_e64_dpp %8, 2, %1.sub0, 2, %5, 2, %4.sub1, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec 764name: dpp_reg_sequence_src2 765tracksRegLiveness: true 766body: | 767 bb.0: 768 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 769 770 %0:vreg_64 = COPY $vgpr0_vgpr1 771 %1:vreg_64 = COPY $vgpr2_vgpr3 772 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 773 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 774 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 775 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 776 %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec 777... 778 779# GCN-LABEL: name: dpp64_add64_impdef 780# GCN: %3:vgpr_32 = V_ADD_U32_dpp %1.sub0, %0.sub0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $exec 781# GCN: %5:vgpr_32 = V_ADDC_U32_dpp %1.sub1, %0.sub1, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 782name: dpp64_add64_impdef 783tracksRegLiveness: true 784body: | 785 bb.0: 786 liveins: $vcc_lo 787 %0:vreg_64 = IMPLICIT_DEF 788 %1:vreg_64 = IMPLICIT_DEF 789 %2:vreg_64 = V_MOV_B64_DPP_PSEUDO %1:vreg_64, %0:vreg_64, 1, 15, 15, 1, implicit $exec 790 %5:vgpr_32 = V_ADD_U32_e32 %2.sub0, undef %4:vgpr_32, implicit $exec 791 %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec 792... 793 794# GCN-LABEL: name: dpp64_add64_undef 795# GCN: %3:vgpr_32 = V_ADD_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit $exec 796# GCN: %5:vgpr_32 = V_ADDC_U32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 797name: dpp64_add64_undef 798tracksRegLiveness: true 799body: | 800 bb.0: 801 liveins: $vcc_lo 802 %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec 803 %5:vgpr_32 = V_ADD_U32_e32 %2.sub0, undef %4:vgpr_32, implicit $exec 804 %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec 805... 806 807 808# GCN-LABEL: name: cndmask_with_src2 809# GCN: %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec 810# GCN: %8:vgpr_32 = V_CNDMASK_B32_e64_dpp %2, 4, %1, 0, %1, %7, 1, 15, 15, 1, implicit $exec 811name: cndmask_with_src2 812tracksRegLiveness: true 813body: | 814 bb.0: 815 liveins: $vgpr0, $vgpr1 816 %0:vgpr_32 = COPY $vgpr0 817 %1:vgpr_32 = COPY $vgpr1 818 %2:vgpr_32 = IMPLICIT_DEF 819 820 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 821 %4:sreg_32_xm0_xexec = IMPLICIT_DEF 822 %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec 823 824 ; src2 is legal for _e64 825 %6:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 15, 1, implicit $exec 826 %7:sreg_32_xm0_xexec = IMPLICIT_DEF 827 %8:vgpr_32 = V_CNDMASK_B32_e64 4, %6, 0, %1, %7, implicit $exec 828... 829 830--- 831 832# Make sure flags aren't dropped 833# GCN-LABEL: name: flags_add_f32_e64 834# GCN: %4:vgpr_32 = nnan nofpexcept V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec 835name: flags_add_f32_e64 836tracksRegLiveness: true 837body: | 838 bb.0: 839 liveins: $vgpr0, $vgpr1 840 841 %0:vgpr_32 = COPY $vgpr0 842 %1:vgpr_32 = COPY $vgpr1 843 %2:vgpr_32 = IMPLICIT_DEF 844 845 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 846 %4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec 847 S_ENDPGM 0, implicit %4 848 849... 850 851# GCN-LABEL: name: dont_combine_more_than_one_operand 852# GCN: %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec 853name: dont_combine_more_than_one_operand 854tracksRegLiveness: true 855body: | 856 bb.0: 857 liveins: $vgpr0, $vgpr1 858 %0:vgpr_32 = COPY $vgpr0 859 %1:vgpr_32 = COPY $vgpr1 860 %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec 861 %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec 862... 863 864# GCN-LABEL: name: dont_combine_more_than_one_operand_dpp_reg_sequence 865# GCN: %5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec 866# GCN: %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec 867name: dont_combine_more_than_one_operand_dpp_reg_sequence 868tracksRegLiveness: true 869body: | 870 bb.0: 871 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo 872 %0:vreg_64 = COPY $vgpr0_vgpr1 873 %1:vreg_64 = COPY $vgpr2_vgpr3 874 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 875 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 876 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 877 %5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec 878 %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec 879... 880 881# Check op_sel is all 0s when combining 882# GCN-LABEL: name: opsel_vop3 883# GCN: %4:vgpr_32 = V_ADD_I16_fake16_e64_dpp %2, 0, %0, 0, %1, 0, 0, 1, 15, 15, 1, implicit $exec 884# GCN: %6:vgpr_32 = V_ADD_I16_fake16_e64 4, %5, 0, %1, 0, 0, implicit $exec 885# GCN: %8:vgpr_32 = V_ADD_I16_fake16_e64 0, %7, 4, %1, 0, 0, implicit $exec 886# GCN: %10:vgpr_32 = V_ADD_I16_fake16_e64 4, %9, 4, %1, 0, 0, implicit $exec 887# GCN: %12:vgpr_32 = V_ADD_I16_fake16_e64 8, %11, 0, %1, 0, 0, implicit $exec 888name: opsel_vop3 889tracksRegLiveness: true 890body: | 891 bb.0: 892 liveins: $vgpr0, $vgpr1 893 894 %0:vgpr_32 = COPY $vgpr0 895 %1:vgpr_32 = COPY $vgpr1 896 %2:vgpr_32 = IMPLICIT_DEF 897 898 ; Combine for op_sel:[0,0,0] 899 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 900 %4:vgpr_32 = V_ADD_I16_fake16_e64 0, %3, 0, %1, 0, 0, implicit $exec 901 902 ; Do not combine for op_sel:[1,0,0] 903 %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 904 %6:vgpr_32 = V_ADD_I16_fake16_e64 4, %5, 0, %1, 0, 0, implicit $exec 905 906 ; Do not combine for op_sel:[0,1,0] 907 %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 908 %8:vgpr_32 = V_ADD_I16_fake16_e64 0, %7, 4, %1, 0, 0, implicit $exec 909 910 ; Do not combine for op_sel:[1,1,0] 911 %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 912 %10:vgpr_32 = V_ADD_I16_fake16_e64 4, %9, 4, %1, 0, 0, implicit $exec 913 914 ; Do not combine for op_sel:[0,0,1] (dst_op_sel only) 915 %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 916 %12:vgpr_32 = V_ADD_I16_fake16_e64 8, %11, 0, %1, 0, 0, implicit $exec 917... 918 919# Check op_sel is all 0s and op_sel_hi is all 1s when combining 920# GCN-LABEL: name: opsel_vop3p 921# GCN: %5:vgpr_32 = V_FMA_MIX_F32 0, %4, 0, %1, 0, %2, 0, 0, 0, implicit $mode, implicit $exec 922# GCN: %7:vgpr_32 = V_FMA_MIX_F32 4, %6, 4, %1, 4, %2, 0, 0, 0, implicit $mode, implicit $exec 923# GCN: %9:vgpr_32 = V_FMA_MIX_F32_dpp %3, 8, %0, 8, %1, 8, %2, 0, 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec 924# GCN: %11:vgpr_32 = V_FMA_MIX_F32 12, %10, 12, %1, 12, %2, 0, 0, 0, implicit $mode, implicit $exec 925 926name: opsel_vop3p 927tracksRegLiveness: true 928body: | 929 bb.0: 930 liveins: $vgpr0, $vgpr1, $vgpr2 931 932 %0:vgpr_32 = COPY $vgpr0 933 %1:vgpr_32 = COPY $vgpr1 934 %2:vgpr_32 = COPY $vgpr2 935 %3:vgpr_32 = IMPLICIT_DEF 936 937 ; Do not combine for op_sel:[0,0,0] op_sel_hi:[0,0,0] 938 %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec 939 %5:vgpr_32 = V_FMA_MIX_F32 0, %4, 0, %1, 0, %2, 0, 0, 0, implicit $mode, implicit $exec 940 941 ; Do not combine for op_sel:[1,1,1] op_sel_hi:[0,0,0] 942 %6:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec 943 %7:vgpr_32 = V_FMA_MIX_F32 4, %6, 4, %1, 4, %2, 0, 0, 0, implicit $mode, implicit $exec 944 945 ; Combine for op_sel:[0,0,0] op_sel_hi:[1,1,1] 946 %8:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec 947 %9:vgpr_32 = V_FMA_MIX_F32 8, %8, 8, %1, 8, %2, 0, 0, 0, implicit $mode, implicit $exec 948 949 ; Do not combine for op_sel:[1,1,1] op_sel_hi:[1,1,1] 950 %10:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec 951 %11:vgpr_32 = V_FMA_MIX_F32 12, %10, 12, %1, 12, %2, 0, 0, 0, implicit $mode, implicit $exec 952... 953