1# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s 2# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s 3# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s 4# RUN: llc -mtriple=amdgcn -mcpu=fiji -passes=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s 5 6# test for 3 consecutive _sdwa's 7# GFX9-LABEL: name: test1_add_co_sdwa 8# GFX9: = nsw V_ADD_CO_U32_sdwa 9# GFX9-NEXT: = nuw V_ADDC_U32_e64 10# GFX9: V_ADD_CO_U32_sdwa 11# GFX9-NEXT: V_ADDC_U32_e64 12# GFX9: V_ADD_CO_U32_sdwa 13# GFX9-NEXT: V_ADDC_U32_e64 14--- 15name: test1_add_co_sdwa 16tracksRegLiveness: true 17registers: 18 - { id: 0, class: vgpr_32, preferred-register: '' } 19liveins: 20 - { reg: '$vgpr0', virtual-reg: '%0' } 21 - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } 22body: | 23 bb.0: 24 liveins: $vgpr0, $sgpr0_sgpr1 25 26 %1:sgpr_64 = COPY $sgpr0_sgpr1 27 %0:vgpr_32 = COPY $vgpr0 28 %22:sreg_32_xm0 = S_MOV_B32 255 29 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec 30 %30:vreg_64 = COPY $sgpr0_sgpr1 31 %63:vgpr_32, %65:sreg_64_xexec = nsw V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec 32 %64:vgpr_32, dead %66:sreg_64_xexec = nuw V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec 33 %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 34 GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) 35 36 %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec 37 %163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec 38 %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec 39 %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1 40 GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %162, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) 41 42 %171:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec 43 %173:vgpr_32, %175:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %171, 0, implicit $exec 44 %174:vgpr_32, dead %176:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %175, 0, implicit $exec 45 %172:vreg_64 = REG_SEQUENCE %173, %subreg.sub0, %174, %subreg.sub1 46 GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %172, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) 47 48... 49 50# test for VCC interference on sdwa, should generate 1 xform only 51# GFX9-LABEL: name: test2_add_co_sdwa 52# GFX9: V_ADD_CO_U32_sdwa 53# GFX9: V_ADDC_U32_e64 54# GFX9-NOT: V_ADD_CO_U32_sdwa 55# GFX9-NOT: V_ADDC_U32_e32 56--- 57name: test2_add_co_sdwa 58tracksRegLiveness: true 59registers: 60 - { id: 0, class: vgpr_32, preferred-register: '' } 61liveins: 62 - { reg: '$vgpr0', virtual-reg: '%0' } 63 - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } 64body: | 65 bb.0: 66 liveins: $vgpr0, $sgpr0_sgpr1 67 68 %1:sgpr_64 = COPY $sgpr0_sgpr1 69 %0:vgpr_32 = COPY $vgpr0 70 %22:sreg_32_xm0 = S_MOV_B32 255 71 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec 72 %30:vreg_64 = COPY $sgpr0_sgpr1 73 %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec 74 75 %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec 76 %163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec 77 %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec 78 %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1 79 80 %64:vgpr_32, dead %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec 81 %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 82 GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) 83 84 %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec 85 %163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec 86 %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec 87 %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1 88 GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %162, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) 89 90... 91 92# test for CarryOut used, should reject 93# GFX9-LABEL: name: test3_add_co_sdwa 94# GFX9: V_ADD_CO_U32_e64 95# GFX9: V_ADDC_U32_e64 96# GFX9-NOT: V_ADD_CO_U32_sdwa 97# GFX9-NOT: V_ADDC_U32_e32 98--- 99name: test3_add_co_sdwa 100tracksRegLiveness: true 101registers: 102 - { id: 0, class: vgpr_32, preferred-register: '' } 103liveins: 104 - { reg: '$vgpr0', virtual-reg: '%0' } 105 - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } 106body: | 107 bb.0: 108 liveins: $vgpr0, $sgpr0_sgpr1 109 110 %1:sgpr_64 = COPY $sgpr0_sgpr1 111 %0:vgpr_32 = COPY $vgpr0 112 %22:sreg_32_xm0 = S_MOV_B32 255 113 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec 114 %30:vreg_64 = COPY $sgpr0_sgpr1 115 %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec 116 %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec 117 %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %66, %subreg.sub1 118 GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) 119 120... 121 122# test for CarryIn used more than once, should reject 123# GFX9-LABEL: name: test4_add_co_sdwa 124# GFX9: V_ADD_CO_U32_e64 125# GFX9: V_ADDC_U32_e64 126# GFX9-NOT: V_ADD_CO_U32_sdwa 127# GFX9-NOT: V_ADDC_U32_e32 128--- 129name: test4_add_co_sdwa 130tracksRegLiveness: true 131registers: 132 - { id: 0, class: vgpr_32, preferred-register: '' } 133liveins: 134 - { reg: '$vgpr0', virtual-reg: '%0' } 135 - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } 136body: | 137 bb.0: 138 liveins: $vgpr0, $sgpr0_sgpr1 139 140 %1:sgpr_64 = COPY $sgpr0_sgpr1 141 %0:vgpr_32 = COPY $vgpr0 142 %22:sreg_32_xm0 = S_MOV_B32 255 143 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec 144 %30:vreg_64 = COPY $sgpr0_sgpr1 145 %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec 146 %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec 147 %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %65, %subreg.sub1 148 GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) 149 150 151... 152 153# test for simple example, should generate sdwa 154# GFX9-LABEL: name: test5_add_co_sdwa 155# GFX9: V_ADD_CO_U32_sdwa 156# GFX9: V_ADDC_U32_e64 157--- 158name: test5_add_co_sdwa 159tracksRegLiveness: true 160registers: 161 - { id: 0, class: vgpr_32, preferred-register: '' } 162liveins: 163 - { reg: '$vgpr0', virtual-reg: '%0' } 164 - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } 165body: | 166 bb.0: 167 liveins: $vgpr0, $sgpr0_sgpr1 168 169 %1:sgpr_64 = COPY $sgpr0_sgpr1 170 %0:vgpr_32 = COPY $vgpr0 171 %22:sreg_32_xm0 = S_MOV_B32 255 172 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec 173 %30:vreg_64 = COPY $sgpr0_sgpr1 174 %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec 175 %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec 176 %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 177 GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) 178 179 180... 181 182# test for V_ADD_CO_U32_e64 only, should reject 183# GFX9-LABEL: name: test6_add_co_sdwa 184# GFX9: V_ADD_CO_U32_e64 185# GFX9-NOT: V_ADD_CO_U32_sdwa 186# GFX9-NOT: V_ADDC_U32_e32 187--- 188name: test6_add_co_sdwa 189tracksRegLiveness: true 190registers: 191 - { id: 0, class: vgpr_32, preferred-register: '' } 192liveins: 193 - { reg: '$vgpr0', virtual-reg: '%0' } 194 - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } 195body: | 196 bb.0: 197 liveins: $vgpr0, $sgpr0_sgpr1 198 199 %1:sgpr_64 = COPY $sgpr0_sgpr1 200 %0:vgpr_32 = COPY $vgpr0 201 %22:sreg_32_xm0 = S_MOV_B32 255 202 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec 203 %30:vreg_64 = COPY $sgpr0_sgpr1 204 %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec 205 %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %23, %subreg.sub1 206 GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) 207 208 209... 210 211# test for V_ADDC_U32_e64 only, should reject 212# GFX9-LABEL: name: test7_add_co_sdwa 213# GFX9: V_ADDC_U32_e64 214# GFX9-NOT: V_ADD_CO_U32_sdwa 215# GFX9-NOT: V_ADDC_U32_e32 216--- 217name: test7_add_co_sdwa 218tracksRegLiveness: true 219registers: 220 - { id: 0, class: vgpr_32, preferred-register: '' } 221liveins: 222 - { reg: '$vgpr0', virtual-reg: '%0' } 223 - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } 224body: | 225 bb.0: 226 liveins: $vgpr0, $sgpr0_sgpr1 227 228 %1:sgpr_64 = COPY $sgpr0_sgpr1 229 %0:vgpr_32 = COPY $vgpr0 230 %22:sreg_32_xm0 = S_MOV_B32 255 231 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec 232 %24:sreg_64_xexec = COPY $sgpr0_sgpr1 233 234 %30:vreg_64 = COPY $sgpr0_sgpr1 235 %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %24, 0, implicit $exec 236 %62:vreg_64 = REG_SEQUENCE %23, %subreg.sub0, %23, %subreg.sub1 237 GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) 238 239 240... 241 242# test for $vcc defined between two adds, should not generate 243# GFX9-LABEL: name: test8_add_co_sdwa 244# GFX9-NOT: V_ADD_CO_U32_sdwa 245# GFX9: V_ADDC_U32_e64 246--- 247name: test8_add_co_sdwa 248tracksRegLiveness: true 249registers: 250 - { id: 0, class: vgpr_32, preferred-register: '' } 251liveins: 252 - { reg: '$vgpr0', virtual-reg: '%0' } 253 - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } 254body: | 255 bb.0: 256 liveins: $vgpr0, $sgpr0_sgpr1 257 258 %1:sgpr_64 = COPY $sgpr0_sgpr1 259 %0:vgpr_32 = COPY $vgpr0 260 %22:sreg_32_xm0 = S_MOV_B32 255 261 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec 262 %30:vreg_64 = COPY $sgpr0_sgpr1 263 %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec 264 $vcc = COPY %30 265 %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec 266 %31:vreg_64 = COPY $vcc 267 %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 268 GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) 269 270 271... 272 273# test for non dead $vcc, should not generate 274# GFX9-LABEL: name: test9_add_co_sdwa 275# GFX9-NOT: V_ADD_CO_U32_sdwa 276# GFX9: V_ADDC_U32_e64 277--- 278name: test9_add_co_sdwa 279tracksRegLiveness: true 280registers: 281 - { id: 0, class: vgpr_32, preferred-register: '' } 282liveins: 283 - { reg: '$vgpr0', virtual-reg: '%0' } 284 - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } 285body: | 286 bb.0: 287 liveins: $vgpr0, $sgpr0_sgpr1 288 289 %1:sgpr_64 = COPY $sgpr0_sgpr1 290 %0:vgpr_32 = COPY $vgpr0 291 %22:sreg_32_xm0 = S_MOV_B32 255 292 %30:vreg_64 = COPY $sgpr0_sgpr1 293 $vcc = COPY %30 294 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec 295 %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec 296 %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec 297 %31:vreg_64 = COPY $vcc 298 %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 299 GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) 300 301 302... 303 304# test for def $vcc_lo, should not generate 305# GFX9-LABEL: name: test10_add_co_sdwa 306# GFX9-NOT: V_ADD_CO_U32_sdwa 307# GFX9: V_ADDC_U32_e64 308--- 309name: test10_add_co_sdwa 310tracksRegLiveness: true 311registers: 312 - { id: 0, class: vgpr_32, preferred-register: '' } 313liveins: 314 - { reg: '$vgpr0', virtual-reg: '%0' } 315 - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } 316body: | 317 bb.0: 318 liveins: $vgpr0, $sgpr0_sgpr1 319 320 %1:sgpr_64 = COPY $sgpr0_sgpr1 321 %0:vgpr_32 = COPY $vgpr0 322 %22:sreg_32_xm0 = S_MOV_B32 255 323 %30:vreg_64 = COPY $sgpr0_sgpr1 324 $vcc_lo = COPY %30.sub0 325 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec 326 %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec 327 %31:vgpr_32 = COPY $vcc_lo 328 %32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1 329 %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec 330 %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 331 GLOBAL_STORE_DWORDX2_SADDR %32.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) 332 333... 334 335# test for read $vcc_hi, should not generate 336# GFX9-LABEL: name: test11_add_co_sdwa 337# GFX9-NOT: V_ADD_CO_U32_sdwa 338# GFX9: V_ADDC_U32_e64 339--- 340name: test11_add_co_sdwa 341tracksRegLiveness: true 342registers: 343 - { id: 0, class: vgpr_32, preferred-register: '' } 344liveins: 345 - { reg: '$vgpr0', virtual-reg: '%0' } 346 - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } 347body: | 348 bb.0: 349 liveins: $vgpr0, $sgpr0_sgpr1 350 351 %1:sgpr_64 = COPY $sgpr0_sgpr1 352 %0:vgpr_32 = COPY $vgpr0 353 %22:sreg_32_xm0 = S_MOV_B32 255 354 %30:vreg_64 = COPY $sgpr0_sgpr1 355 $vcc_hi = COPY %30.sub0 356 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec 357 %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec 358 %31:vgpr_32 = COPY $vcc_hi 359 %32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1 360 %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec 361 %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 362 GLOBAL_STORE_DWORDX2_SADDR %32.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) 363 364... 365 366# test for $vcc defined and used between adds, should not generate 367# GFX9-LABEL: name: test12_add_co_sdwa 368# GFX9-NOT: V_ADD_CO_U32_sdwa 369# GFX9: V_ADDC_U32_e64 370--- 371name: test12_add_co_sdwa 372tracksRegLiveness: true 373registers: 374 - { id: 0, class: vgpr_32, preferred-register: '' } 375liveins: 376 - { reg: '$vgpr0', virtual-reg: '%0' } 377 - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } 378body: | 379 bb.0: 380 liveins: $vgpr0, $sgpr0_sgpr1 381 382 %1:sgpr_64 = COPY $sgpr0_sgpr1 383 %0:vgpr_32 = COPY $vgpr0 384 %22:sreg_32_xm0 = S_MOV_B32 255 385 %30:vreg_64 = COPY $sgpr0_sgpr1 386 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec 387 %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec 388 $vcc = COPY %30 389 %31:vreg_64 = COPY killed $vcc 390 %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec 391 %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 392 GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) 393