1# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN 2# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN 3 4--- 5# old is undefined: only combine when masks are fully enabled and 6# bound_ctrl:1 is set, otherwise the result of DPP VALU op can be undefined. 7# GCN-LABEL: name: old_is_undef 8# GCN: %2:vgpr_32 = IMPLICIT_DEF 9# VOP2: 10# GCN: %4:vgpr_32 = V_ADD_U32_dpp %2, %0, %1, 1, 15, 15, 1, implicit $exec 11# GCN: %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec 12# GCN: %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec 13# GCN: %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec 14# VOP1: 15# GCN: %12:vgpr_32 = V_NOT_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 16# GCN: %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec 17# GCN: %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec 18# GCN: %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec 19name: old_is_undef 20tracksRegLiveness: true 21body: | 22 bb.0: 23 liveins: $vgpr0, $vgpr1 24 %0:vgpr_32 = COPY $vgpr0 25 %1:vgpr_32 = COPY $vgpr1 26 %2:vgpr_32 = IMPLICIT_DEF 27 28 ; VOP2 29 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 30 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 31 32 %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 33 %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec 34 35 %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 36 %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec 37 38 %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 39 %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec 40 41 ; VOP1 42 %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 43 %12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec 44 45 %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 46 %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec 47 48 %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 49 %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec 50 51 %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 52 %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec 53... 54 55# old is zero cases: 56 57# GCN-LABEL: name: old_is_0 58 59# VOP2: 60# case 1: old is zero, masks are fully enabled, bound_ctrl:1 is on: 61# the DPP mov result would be either zero ({src lane disabled}|{src lane is 62# out of range}) or active src lane result - can combine with old = undef. 63# undef is preffered as it makes life easier for the regalloc. 64# GCN: [[U1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 65# GCN: %4:vgpr_32 = V_ADD_U32_dpp [[U1]], %0, %1, 1, 15, 15, 1, implicit $exec 66 67# case 2: old is zero, masks are fully enabled, bound_ctrl:1 is off: 68# as the DPP mov old is zero this case is no different from case 1 - combine it 69# setting bound_ctrl:1 on for the combined DPP VALU op to make old undefined 70# GCN: [[U2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 71# GCN: %6:vgpr_32 = V_ADD_U32_dpp [[U2]], %0, %1, 1, 15, 15, 1, implicit $exec 72 73# case 3: masks are partialy disabled, bound_ctrl:1 is on: 74# the DPP mov result would be either zero ({src lane disabled}|{src lane is 75# out of range} or {the DPP mov's dest VGPR write is disabled by masks}) or 76# active src lane result - can combine with old = src1 of the VALU op. 77# The VALU op should have the same masks as DPP mov as they select lanes 78# with identity value. 79# Special case: the bound_ctrl for the combined DPP VALU op isn't important 80# here but let's make it off to keep the combiner's logic simpler. 81# GCN: %8:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 82 83# case 4: masks are partialy disabled, bound_ctrl:1 is off: 84# the DPP mov result would be either zero ({src lane disabled}|{src lane is 85# out of range} or {the DPP mov's dest VGPR write is disabled by masks}) or 86# active src lane result - can combine with old = src1 of the VALU op. 87# The VALU op should have the same masks as DPP mov as they select 88# lanes with identity value 89# GCN: %10:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 90 91# VOP1: 92# see case 1 93# GCN: [[U3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 94# GCN: %12:vgpr_32 = V_NOT_B32_dpp [[U3]], %0, 1, 15, 15, 1, implicit $exec 95# see case 2 96# GCN: [[U4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 97# GCN: %14:vgpr_32 = V_NOT_B32_dpp [[U4]], %0, 1, 15, 15, 1, implicit $exec 98# case 3 and 4 not appliable as there is no way to specify unchanged result 99# for the unary VALU op 100# GCN: %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec 101# GCN: %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec 102 103name: old_is_0 104tracksRegLiveness: true 105body: | 106 bb.0: 107 liveins: $vgpr0, $vgpr1 108 %0:vgpr_32 = COPY $vgpr0 109 %1:vgpr_32 = COPY $vgpr1 110 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 111 112 ; VOP2 113 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 114 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 115 116 %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 117 %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec 118 119 %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 120 %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec 121 122 %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 123 %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec 124 125 ; VOP1 126 %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 127 %12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec 128 129 %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 130 %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec 131 132 %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 133 %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec 134 135 %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 136 %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec 137... 138 139# old is nonzero identity cases: 140 141# old is nonzero identity, masks are fully enabled, bound_ctrl:1 is off: 142# the DPP mov result would be either identity ({src lane disabled}|{out of 143# range}) or src lane result - can combine with old = src1 of the VALU op 144# The DPP VALU op should have the same masks (and bctrl) as DPP mov as they 145# select lanes with identity value 146 147# GCN-LABEL: name: nonzero_old_is_identity_masks_enabled_bctl_off 148# GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec 149# GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec 150# GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec 151# GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec 152 153name: nonzero_old_is_identity_masks_enabled_bctl_off 154tracksRegLiveness: true 155body: | 156 bb.0: 157 liveins: $vgpr0, $vgpr1 158 %0:vgpr_32 = COPY $vgpr0 159 %1:vgpr_32 = COPY $vgpr1 160 161 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 162 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 163 %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec 164 165 %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec 166 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 0, implicit $exec 167 %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec 168 169 %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec 170 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 15, 15, 0, implicit $exec 171 %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec 172 173 %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec 174 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 15, 0, implicit $exec 175 %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec 176... 177 178# old is nonzero identity, masks are partially enabled, bound_ctrl:1 is off: 179# the DPP mov result would be either identity ({src lane disabled}|{src lane is 180# out of range} or {the DPP mov's dest VGPR write is disabled by masks}) or 181# active src lane result - can combine with old = src1 of the VALU op. 182# The DPP VALU op should have the same masks (and bctrl) as DPP mov as they 183# select lanes with identity value 184 185# GCN-LABEL: name: nonzero_old_is_identity_masks_partially_disabled_bctl_off 186# GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 187# GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec 188# GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 189# GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec 190 191name: nonzero_old_is_identity_masks_partially_disabled_bctl_off 192tracksRegLiveness: true 193body: | 194 bb.0: 195 liveins: $vgpr0, $vgpr1 196 %0:vgpr_32 = COPY $vgpr0 197 %1:vgpr_32 = COPY $vgpr1 198 199 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 200 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 201 %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec 202 203 %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec 204 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec 205 %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec 206 207 %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec 208 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec 209 %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec 210 211 %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec 212 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec 213 %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec 214... 215 216# old is nonzero identity, masks are partially enabled, bound_ctrl:1 is on: 217# the DPP mov result may have 3 different values: 218# 1. the active src lane result 219# 2. 0 if the src lane is disabled|out of range 220# 3. DPP mov's old value if the mov's dest VGPR write is disabled by masks 221# can't combine 222 223# GCN-LABEL: name: nonzero_old_is_identity_masks_partially_disabled_bctl0 224# GCN: %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec 225# GCN: %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec 226# GCN: %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec 227# GCN: %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec 228 229name: nonzero_old_is_identity_masks_partially_disabled_bctl0 230tracksRegLiveness: true 231body: | 232 bb.0: 233 liveins: $vgpr0, $vgpr1 234 %0:vgpr_32 = COPY $vgpr0 235 %1:vgpr_32 = COPY $vgpr1 236 237 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 238 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 239 %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec 240 241 %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec 242 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 1, implicit $exec 243 %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec 244 245 %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec 246 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 1, implicit $exec 247 %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec 248 249 %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec 250 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 1, implicit $exec 251 %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec 252... 253 254# when the DPP source isn't a src0 operand the operation should be commuted if possible 255# GCN-LABEL: name: dpp_commute 256# GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 257# GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec 258# GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 259# GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec 260# GCN: %16:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 261# GCN: %19:vgpr_32 = V_ADD_CO_U32_e32 5, %18, implicit-def $vcc, implicit $exec 262name: dpp_commute 263tracksRegLiveness: true 264body: | 265 bb.0: 266 liveins: $vgpr0, $vgpr1 267 268 %0:vgpr_32 = COPY $vgpr0 269 %1:vgpr_32 = COPY $vgpr1 270 271 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 272 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 273 %4:vgpr_32 = V_MUL_U32_U24_e32 %1, %3, implicit $exec 274 275 %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec 276 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec 277 %7:vgpr_32 = V_AND_B32_e32 %1, %6, implicit $exec 278 279 %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec 280 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec 281 %10:vgpr_32 = V_MAX_I32_e32 %1, %9, implicit $exec 282 283 %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec 284 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec 285 %13:vgpr_32 = V_MIN_I32_e32 %1, %12, implicit $exec 286 287 %14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 288 %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec 289 %16:vgpr_32 = V_SUB_CO_U32_e32 %1, %15, implicit-def $vcc, implicit $exec 290 291 ; this cannot be combined because immediate as src0 isn't commutable 292 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 293 %18:vgpr_32 = V_MOV_B32_dpp %17, %0, 1, 14, 15, 0, implicit $exec 294 %19:vgpr_32 = V_ADD_CO_U32_e32 5, %18, implicit-def $vcc, implicit $exec 295... 296 297--- 298 299# check for floating point modifiers 300# GCN-LABEL: name: add_f32_e64 301# GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 302# GCN: %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec 303# GCN: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec 304# GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec 305# GCN: %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec 306 307name: add_f32_e64 308tracksRegLiveness: true 309body: | 310 bb.0: 311 liveins: $vgpr0, $vgpr1 312 313 %0:vgpr_32 = COPY $vgpr0 314 %1:vgpr_32 = COPY $vgpr1 315 %2:vgpr_32 = IMPLICIT_DEF 316 317 ; this shouldn't be combined as omod is set 318 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 319 %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec 320 321 ; this should be combined as all modifiers are default 322 %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 323 %6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec 324 325 ; this should be combined as modifiers other than abs|neg are default 326 %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 327 %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec 328 329 ; this shouldn't be combined as modifiers aren't abs|neg 330 %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 331 %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec 332... 333 334# check for e64 modifiers 335# GCN-LABEL: name: add_u32_e64 336# GCN: %4:vgpr_32 = V_ADD_U32_dpp %2, %0, %1, 1, 15, 15, 1, implicit $exec 337# GCN: %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec 338 339name: add_u32_e64 340tracksRegLiveness: true 341body: | 342 bb.0: 343 liveins: $vgpr0, $vgpr1 344 345 %0:vgpr_32 = COPY $vgpr0 346 %1:vgpr_32 = COPY $vgpr1 347 %2:vgpr_32 = IMPLICIT_DEF 348 349 ; this should be combined as all modifiers are default 350 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec 351 %4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec 352 353 ; this shouldn't be combined as clamp is set 354 %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec 355 %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec 356... 357 358# GCN-LABEL: name: add_co_u32_e64 359# GCN: %4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, %1, 0, implicit $exec 360 361name: add_co_u32_e64 362tracksRegLiveness: true 363body: | 364 bb.0: 365 liveins: $vgpr0, $vgpr1 366 367 %0:vgpr_32 = COPY $vgpr0 368 %1:vgpr_32 = COPY $vgpr1 369 %2:vgpr_32 = IMPLICIT_DEF 370 371 ; this shouldn't be combined as the carry-out is used 372 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec 373 %4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, %1, 0, implicit $exec 374 375 S_NOP 0, implicit %5 376... 377 378# tests on sequences of dpp consumers 379# GCN-LABEL: name: dpp_seq 380# GCN: %4:vgpr_32 = V_ADD_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 381# GCN: %5:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 382# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 383# broken sequence: 384# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 385 386name: dpp_seq 387tracksRegLiveness: true 388body: | 389 bb.0: 390 liveins: $vgpr0, $vgpr1 391 %0:vgpr_32 = COPY $vgpr0 392 %1:vgpr_32 = COPY $vgpr1 393 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 394 395 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 396 %4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec 397 %5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec 398 %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec 399 400 %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 401 %8:vgpr_32 = V_ADD_CO_U32_e32 %7, %1, implicit-def $vcc, implicit $exec 402 ; this breaks the sequence 403 %9:vgpr_32 = V_SUB_CO_U32_e32 5, %7, implicit-def $vcc, implicit $exec 404... 405 406# tests on sequences of dpp consumers followed by control flow 407# GCN-LABEL: name: dpp_seq_cf 408# GCN: %4:vgpr_32 = V_ADD_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 409# GCN: %5:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 410# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 411 412name: dpp_seq_cf 413tracksRegLiveness: true 414body: | 415 bb.0: 416 successors: %bb.1, %bb.2 417 liveins: $vgpr0, $vgpr1 418 %0:vgpr_32 = COPY $vgpr0 419 %1:vgpr_32 = COPY $vgpr1 420 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 421 422 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 423 %4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec 424 %5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec 425 %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec 426 427 %7:sreg_64 = V_CMP_EQ_U32_e64 %5, %6, implicit $exec 428 %8:sreg_64 = SI_IF %7, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec 429 S_BRANCH %bb.1 430 431 bb.1: 432 successors: %bb.2 433 434 bb.2: 435 SI_END_CF %8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec 436... 437 438# GCN-LABEL: name: old_in_diff_bb 439# GCN: %4:vgpr_32 = V_ADD_U32_dpp %0, %1, %0, 1, 1, 1, 0, implicit $exec 440 441name: old_in_diff_bb 442tracksRegLiveness: true 443body: | 444 bb.0: 445 successors: %bb.1 446 liveins: $vgpr0, $vgpr1 447 448 %0:vgpr_32 = COPY $vgpr0 449 %1:vgpr_32 = COPY $vgpr1 450 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 451 S_BRANCH %bb.1 452 453 bb.1: 454 %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec 455 %4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec 456... 457 458# old reg def is in diff BB but bound_ctrl:1 - can combine 459# GCN-LABEL: name: old_in_diff_bb_bctrl_zero 460# GCN: %4:vgpr_32 = V_ADD_U32_dpp {{%[0-9]}}, %0, %1, 1, 15, 15, 1, implicit $exec 461 462name: old_in_diff_bb_bctrl_zero 463tracksRegLiveness: true 464body: | 465 bb.0: 466 successors: %bb.1 467 liveins: $vgpr0, $vgpr1 468 469 %0:vgpr_32 = COPY $vgpr0 470 %1:vgpr_32 = COPY $vgpr1 471 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 472 S_BRANCH %bb.1 473 474 bb.1: 475 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 476 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 477... 478 479# EXEC mask changed between def and use - cannot combine 480# GCN-LABEL: name: exec_changed 481# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 482 483name: exec_changed 484tracksRegLiveness: true 485body: | 486 bb.0: 487 liveins: $vgpr0, $vgpr1 488 489 %0:vgpr_32 = COPY $vgpr0 490 %1:vgpr_32 = COPY $vgpr1 491 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 492 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 493 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 494 %5:sreg_64 = COPY $exec, implicit-def $exec 495 %6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 496... 497 498# test if $old definition is correctly tracked through subreg manipulation pseudos 499 500# GCN-LABEL: name: mul_old_subreg 501# GCN: %7:vgpr_32 = V_MUL_I32_I24_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec 502 503name: mul_old_subreg 504tracksRegLiveness: true 505body: | 506 bb.0: 507 liveins: $vgpr0, $vgpr1 508 509 %0:vreg_64 = COPY $vgpr0 510 %1:vgpr_32 = COPY $vgpr1 511 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 512 %3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec 513 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 514 %5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4 515 %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec 516 %7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec 517... 518 519# GCN-LABEL: name: add_old_subreg 520# GCN: %5:vgpr_32 = V_ADD_U32_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec 521 522name: add_old_subreg 523tracksRegLiveness: true 524body: | 525 bb.0: 526 liveins: $vgpr0, $vgpr1 527 528 %0:vreg_64 = COPY $vgpr0 529 %1:vgpr_32 = COPY $vgpr1 530 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 531 %3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted 532 %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec 533 %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec 534... 535 536# GCN-LABEL: name: add_old_subreg_undef 537# GCN: %5:vgpr_32 = V_ADD_U32_dpp undef %3.sub1, %1, %0.sub1, 1, 15, 15, 1, implicit $exec 538 539name: add_old_subreg_undef 540tracksRegLiveness: true 541body: | 542 bb.0: 543 liveins: $vgpr0, $vgpr1 544 545 %0:vreg_64 = COPY $vgpr0 546 %1:vgpr_32 = COPY $vgpr1 547 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 548 %3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef 549 %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec 550 %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec 551... 552 553# Test instruction which does not have modifiers in VOP1 form but does in DPP form. 554# GCN-LABEL: name: dpp_vop1 555# GCN: %3:vgpr_32 = V_CEIL_F32_dpp %0, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec 556name: dpp_vop1 557tracksRegLiveness: true 558body: | 559 bb.0: 560 %1:vgpr_32 = IMPLICIT_DEF 561 %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 562 %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec 563... 564 565# Test instruction which does not have modifiers in VOP2 form but does in DPP form. 566# GCN-LABEL: name: dpp_min 567# GCN: %3:vgpr_32 = V_MIN_F32_dpp %0, 0, undef %2:vgpr_32, 0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec 568name: dpp_min 569tracksRegLiveness: true 570body: | 571 bb.0: 572 %1:vgpr_32 = IMPLICIT_DEF 573 %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 574 %4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec 575... 576 577# Test an undef old operand 578# GCN-LABEL: name: dpp_undef_old 579# GCN: %3:vgpr_32 = V_CEIL_F32_dpp undef %1:vgpr_32, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec 580name: dpp_undef_old 581tracksRegLiveness: true 582body: | 583 bb.0: 584 %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 585 %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec 586... 587 588# Do not combine a dpp mov which writes a physreg. 589# GCN-LABEL: name: phys_dpp_mov_dst 590# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec 591# GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec 592name: phys_dpp_mov_dst 593tracksRegLiveness: true 594body: | 595 bb.0: 596 $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 597 %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec 598... 599 600# Do not combine a dpp mov which reads a physreg. 601# GCN-LABEL: name: phys_dpp_mov_old_src 602# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec 603# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec 604name: phys_dpp_mov_old_src 605tracksRegLiveness: true 606body: | 607 bb.0: 608 %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 609 %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec 610... 611 612# Do not combine a dpp mov which reads a physreg. 613# GCN-LABEL: name: phys_dpp_mov_src 614# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec 615# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec 616name: phys_dpp_mov_src 617tracksRegLiveness: true 618body: | 619 bb.0: 620 %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec 621 %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec 622... 623 624# GCN-LABEL: name: dpp_reg_sequence_both_combined 625# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 626# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 627# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 628# GCN: %9:vgpr_32 = IMPLICIT_DEF 629# GCN: %8:vgpr_32 = IMPLICIT_DEF 630# GCN: %6:vgpr_32 = V_ADD_CO_U32_dpp %9, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 631# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 632name: dpp_reg_sequence_both_combined 633tracksRegLiveness: true 634body: | 635 bb.0: 636 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 637 638 %0:vreg_64 = COPY $vgpr0_vgpr1 639 %1:vreg_64 = COPY $vgpr2_vgpr3 640 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 641 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 642 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 643 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 644 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 645 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 646... 647 648# GCN-LABEL: name: dpp_reg_sequence_first_combined 649# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 650# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 651# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 652# GCN: %8:vgpr_32 = IMPLICIT_DEF 653# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 654# GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1 655# GCN: %6:vgpr_32 = V_ADD_CO_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 656# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec 657name: dpp_reg_sequence_first_combined 658tracksRegLiveness: true 659body: | 660 bb.0: 661 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 662 663 %0:vreg_64 = COPY $vgpr0_vgpr1 664 %1:vreg_64 = COPY $vgpr2_vgpr3 665 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 666 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 667 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 668 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 669 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 670 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 671... 672 673# GCN-LABEL: name: dpp_reg_sequence_second_combined 674# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 675# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 676# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 677# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 678# GCN: %8:vgpr_32 = IMPLICIT_DEF 679# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1 680# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec 681# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 682name: dpp_reg_sequence_second_combined 683tracksRegLiveness: true 684body: | 685 bb.0: 686 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 687 688 %0:vreg_64 = COPY $vgpr0_vgpr1 689 %1:vreg_64 = COPY $vgpr2_vgpr3 690 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 691 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 692 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 693 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 694 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 695 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 696... 697 698# GCN-LABEL: name: dpp_reg_sequence_none_combined 699# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 700# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 701# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 702# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 703# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 704# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 705# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec 706# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec 707name: dpp_reg_sequence_none_combined 708tracksRegLiveness: true 709body: | 710 bb.0: 711 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 712 713 %0:vreg_64 = COPY $vgpr0_vgpr1 714 %1:vreg_64 = COPY $vgpr2_vgpr3 715 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 716 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 717 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 718 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 719 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 720 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 721... 722 723# GCN-LABEL: name: dpp_reg_sequence_exec_changed 724# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 725# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 726# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 727# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 728# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 729# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 730# GCN: S_BRANCH %bb.1 731# GCN: bb.1: 732# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec 733# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec 734name: dpp_reg_sequence_exec_changed 735tracksRegLiveness: true 736body: | 737 bb.0: 738 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 739 740 %0:vreg_64 = COPY $vgpr0_vgpr1 741 %1:vreg_64 = COPY $vgpr2_vgpr3 742 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 743 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 744 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 745 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 746 S_BRANCH %bb.1 747 748 bb.1: 749 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 750 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 751... 752 753# GCN-LABEL: name: dpp_reg_sequence_subreg 754# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 755# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 756# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 757# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 758# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 759# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 760# GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1 761# GCN: %7:vgpr_32 = V_ADD_CO_U32_e32 %6.sub0, %2, implicit-def $vcc, implicit $exec 762# GCN: %8:vgpr_32 = V_ADDC_U32_e32 %6.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec 763name: dpp_reg_sequence_subreg 764tracksRegLiveness: true 765body: | 766 bb.0: 767 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 768 769 %0:vreg_64 = COPY $vgpr0_vgpr1 770 %1:vreg_64 = COPY $vgpr2_vgpr3 771 %8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 772 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 773 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 774 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 775 %5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1 776 %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %8, implicit-def $vcc, implicit $exec 777 %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %8, implicit-def $vcc, implicit $vcc, implicit $exec 778... 779 780# GCN-LABEL: name: dpp64_add64_impdef 781# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp %1.sub0, %0.sub0, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 782# GCN: %5:vgpr_32 = V_ADDC_U32_dpp %1.sub1, %0.sub1, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 783name: dpp64_add64_impdef 784tracksRegLiveness: true 785body: | 786 bb.0: 787 %0:vreg_64 = IMPLICIT_DEF 788 %1:vreg_64 = IMPLICIT_DEF 789 %2:vreg_64 = V_MOV_B64_DPP_PSEUDO %1:vreg_64, %0:vreg_64, 1, 15, 15, 1, implicit $exec 790 %5:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %4:vgpr_32, implicit-def $vcc, implicit $exec 791 %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec 792... 793 794# GCN-LABEL: name: dpp64_add64_undef 795# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 796# GCN: %5:vgpr_32 = V_ADDC_U32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 797name: dpp64_add64_undef 798tracksRegLiveness: true 799body: | 800 bb.0: 801 %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec 802 %5:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %4:vgpr_32, implicit-def $vcc, implicit $exec 803 %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec 804... 805 806# GCN-LABEL: name: dpp64_add64_first_combined 807# GCN: %8:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, 1, 15, 15, 1, implicit $exec 808# GCN: %0:vreg_64 = REG_SEQUENCE undef %7:vgpr_32, %subreg.sub0, %8, %subreg.sub1 809# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 810# GCN: %5:vgpr_32, dead %6:sreg_64_xexec = V_ADDC_U32_e64 1, %0.sub1, undef $vcc, 0, implicit $exec 811name: dpp64_add64_first_combined 812tracksRegLiveness: true 813body: | 814 bb.0: 815 %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec 816 %4:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %3:vgpr_32, implicit-def $vcc, implicit $exec 817 %5:vgpr_32, dead %6:sreg_64_xexec = V_ADDC_U32_e64 1, %2.sub1, undef $vcc, 0, implicit $exec 818... 819 820# GCN-LABEL: name: dont_combine_cndmask_with_src2 821# GCN: %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec 822name: dont_combine_cndmask_with_src2 823tracksRegLiveness: true 824body: | 825 bb.0: 826 liveins: $vgpr0, $vgpr1 827 %0:vgpr_32 = COPY $vgpr0 828 %1:vgpr_32 = COPY $vgpr1 829 %2:vgpr_32 = IMPLICIT_DEF 830 831 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 832 %4:sreg_64_xexec = IMPLICIT_DEF 833 %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec 834... 835 836--- 837 838# Make sure flags aren't dropped 839# GCN-LABEL: name: flags_add_f32_e64 840# GCN: %4:vgpr_32 = nnan nofpexcept V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec 841name: flags_add_f32_e64 842tracksRegLiveness: true 843body: | 844 bb.0: 845 liveins: $vgpr0, $vgpr1 846 847 %0:vgpr_32 = COPY $vgpr0 848 %1:vgpr_32 = COPY $vgpr1 849 %2:vgpr_32 = IMPLICIT_DEF 850 851 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 852 %4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec 853 S_ENDPGM 0, implicit %4 854 855... 856 857# GCN-LABEL: name: dont_combine_more_than_one_operand 858# GCN: %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec 859name: dont_combine_more_than_one_operand 860tracksRegLiveness: true 861body: | 862 bb.0: 863 liveins: $vgpr0, $vgpr1 864 %0:vgpr_32 = COPY $vgpr0 865 %1:vgpr_32 = COPY $vgpr1 866 %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec 867 %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec 868... 869 870# GCN-LABEL: name: dont_combine_more_than_one_operand_dpp_reg_sequence 871# GCN: %5:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec 872# GCN: %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec 873name: dont_combine_more_than_one_operand_dpp_reg_sequence 874tracksRegLiveness: true 875body: | 876 bb.0: 877 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 878 %0:vreg_64 = COPY $vgpr0_vgpr1 879 %1:vreg_64 = COPY $vgpr2_vgpr3 880 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 881 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 882 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 883 %5:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec 884 %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec 885... 886 887# execMayBeModifiedBeforeAnyUse used to assert if the queried 888# V_MOV_B32_dpp was the last instruction in the block. 889--- 890name: mov_dpp_last_block_inst 891tracksRegLiveness: true 892body: | 893 ; GCN-LABEL: name: mov_dpp_last_block_inst 894 ; GCN: bb.0: 895 ; GCN-NEXT: successors: %bb.1(0x80000000) 896 ; GCN-NEXT: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8 897 ; GCN-NEXT: {{ $}} 898 ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr8 899 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 900 ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 901 ; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 902 ; GCN-NEXT: {{ $}} 903 ; GCN-NEXT: bb.1: 904 ; GCN-NEXT: successors: %bb.2(0x80000000) 905 ; GCN-NEXT: {{ $}} 906 ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, %5, %bb.2 907 ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[PHI]], 323, 15, 15, 0, implicit $exec 908 ; GCN-NEXT: {{ $}} 909 ; GCN-NEXT: bb.2: 910 ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 911 ; GCN-NEXT: {{ $}} 912 ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF2]], implicit $exec 913 ; GCN-NEXT: V_CMP_NE_U32_e32 1, [[V_CNDMASK_B32_e64_]], implicit-def $vcc, implicit $exec 914 ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc 915 ; GCN-NEXT: S_BRANCH %bb.3 916 ; GCN-NEXT: {{ $}} 917 ; GCN-NEXT: bb.3: 918 ; GCN-NEXT: S_ENDPGM 0 919 bb.0: 920 liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8 921 922 %0:sgpr_32 = COPY $sgpr8 923 %1:vgpr_32 = IMPLICIT_DEF 924 %2:sreg_32 = IMPLICIT_DEF 925 %3:sreg_64_xexec = IMPLICIT_DEF 926 927 bb.1: 928 %4:vgpr_32 = PHI %1, %bb.0, %5, %bb.2 929 %5:vgpr_32 = V_MOV_B32_dpp %1, %4, 323, 15, 15, 0, implicit $exec 930 931 bb.2: 932 %6:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %3, implicit $exec 933 V_CMP_NE_U32_e32 1, %6, implicit-def $vcc, implicit $exec 934 S_CBRANCH_VCCNZ %bb.1, implicit $vcc 935 S_BRANCH %bb.3 936 937 bb.3: 938 S_ENDPGM 0 939 940... 941