1# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s 2 3--- | 4 define amdgpu_ps void @test_strict_wwm_scc() { 5 ret void 6 } 7 define amdgpu_ps void @test_strict_wwm_scc2() { 8 ret void 9 } 10 define amdgpu_ps void @no_cfg() { 11 ret void 12 } 13 define amdgpu_ps void @copy_exec() { 14 ret void 15 } 16 define amdgpu_ps void @scc_always_live() { 17 ret void 18 } 19 define amdgpu_ps void @test_wwm_set_inactive_propagation() { 20 ret void 21 } 22 define amdgpu_ps void @test_wqm_lr_phi() { 23 ret void 24 } 25 define amdgpu_cs void @no_wqm_in_cs() { 26 ret void 27 } 28 define amdgpu_es void @no_wqm_in_es() { 29 ret void 30 } 31 define amdgpu_gs void @no_wqm_in_gs() { 32 ret void 33 } 34 define amdgpu_hs void @no_wqm_in_hs() { 35 ret void 36 } 37 define amdgpu_ls void @no_wqm_in_ls() { 38 ret void 39 } 40 define amdgpu_vs void @no_wqm_in_vs() { 41 ret void 42 } 43... 44--- 45 46--- 47# Check for awareness that s_or_saveexec_b64 clobbers SCC 48# 49#CHECK: ENTER_STRICT_WWM 50#CHECK: S_CMP_LT_I32 51#CHECK: S_CSELECT_B32 52name: test_strict_wwm_scc 53alignment: 1 54exposesReturnsTwice: false 55legalized: false 56regBankSelected: false 57selected: false 58tracksRegLiveness: true 59registers: 60 - { id: 0, class: sgpr_32, preferred-register: '' } 61 - { id: 1, class: sgpr_32, preferred-register: '' } 62 - { id: 2, class: sgpr_32, preferred-register: '' } 63 - { id: 3, class: vgpr_32, preferred-register: '' } 64 - { id: 4, class: vgpr_32, preferred-register: '' } 65 - { id: 5, class: sgpr_32, preferred-register: '' } 66 - { id: 6, class: vgpr_32, preferred-register: '' } 67 - { id: 7, class: vgpr_32, preferred-register: '' } 68 - { id: 8, class: sreg_32_xm0, preferred-register: '' } 69 - { id: 9, class: sreg_32, preferred-register: '' } 70 - { id: 10, class: sreg_32, preferred-register: '' } 71 - { id: 11, class: vgpr_32, preferred-register: '' } 72 - { id: 12, class: vgpr_32, preferred-register: '' } 73liveins: 74 - { reg: '$sgpr0', virtual-reg: '%0' } 75 - { reg: '$sgpr1', virtual-reg: '%1' } 76 - { reg: '$sgpr2', virtual-reg: '%2' } 77 - { reg: '$vgpr0', virtual-reg: '%3' } 78body: | 79 bb.0: 80 liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0 81 82 %3 = COPY $vgpr0 83 %2 = COPY $sgpr2 84 %1 = COPY $sgpr1 85 %0 = COPY $sgpr0 86 S_CMP_LT_I32 0, %0, implicit-def $scc 87 %12 = V_ADD_CO_U32_e32 %3, %3, implicit-def $vcc, implicit $exec 88 %5 = S_CSELECT_B32 %2, %1, implicit $scc 89 %11 = V_ADD_CO_U32_e32 %5, %12, implicit-def $vcc, implicit $exec 90 $vgpr0 = STRICT_WWM %11, implicit $exec 91 SI_RETURN_TO_EPILOG $vgpr0 92 93... 94 95--- 96# Second test for awareness that s_or_saveexec_b64 clobbers SCC 97# Because entry block is treated differently. 98# 99#CHECK: %bb.1 100#CHECK: S_CMP_LT_I32 101#CHECK: COPY $scc 102#CHECK: ENTER_STRICT_WWM 103#CHECK: $scc = COPY 104#CHECK: S_CSELECT_B32 105name: test_strict_wwm_scc2 106tracksRegLiveness: true 107body: | 108 bb.0: 109 liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0 110 111 %3:vgpr_32 = COPY $vgpr0 112 %2:sgpr_32 = COPY $sgpr2 113 %1:sgpr_32 = COPY $sgpr1 114 %0:sgpr_32 = COPY $sgpr0 115 %13:sgpr_128 = IMPLICIT_DEF 116 117 bb.1: 118 S_CMP_LT_I32 0, %0:sgpr_32, implicit-def $scc 119 %10:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %3:vgpr_32, %13:sgpr_128, 0, 0, 0, 0, implicit $exec 120 %12:vgpr_32 = V_ADD_CO_U32_e32 %3:vgpr_32, %3:vgpr_32, implicit-def $vcc, implicit $exec 121 %5:sgpr_32 = S_CSELECT_B32 %2:sgpr_32, %1:sgpr_32, implicit $scc 122 %11:vgpr_32 = V_ADD_CO_U32_e32 %5:sgpr_32, %12:vgpr_32, implicit-def $vcc, implicit $exec 123 $vgpr0 = STRICT_WWM %11:vgpr_32, implicit $exec 124 $vgpr1 = COPY %10:vgpr_32 125 SI_RETURN_TO_EPILOG $vgpr0, $vgpr1 126 127... 128 129--- 130# V_SET_INACTIVE, when its second operand is undef, is replaced by a 131# COPY by si-wqm. Ensure the instruction is removed. 132#CHECK-NOT: V_SET_INACTIVE 133name: no_cfg 134alignment: 1 135exposesReturnsTwice: false 136legalized: false 137regBankSelected: false 138selected: false 139failedISel: false 140tracksRegLiveness: true 141hasWinCFI: false 142registers: 143 - { id: 0, class: sgpr_32, preferred-register: '' } 144 - { id: 1, class: sgpr_32, preferred-register: '' } 145 - { id: 2, class: sgpr_32, preferred-register: '' } 146 - { id: 3, class: sgpr_32, preferred-register: '' } 147 - { id: 4, class: sgpr_32, preferred-register: '' } 148 - { id: 5, class: sgpr_128, preferred-register: '' } 149 - { id: 6, class: sgpr_128, preferred-register: '' } 150 - { id: 7, class: sreg_32, preferred-register: '' } 151 - { id: 8, class: vreg_64, preferred-register: '' } 152 - { id: 9, class: sreg_32, preferred-register: '' } 153 - { id: 10, class: vgpr_32, preferred-register: '' } 154 - { id: 11, class: vgpr_32, preferred-register: '' } 155 - { id: 12, class: vgpr_32, preferred-register: '' } 156 - { id: 13, class: vgpr_32, preferred-register: '' } 157 - { id: 14, class: vgpr_32, preferred-register: '' } 158 - { id: 15, class: vgpr_32, preferred-register: '' } 159 - { id: 16, class: vgpr_32, preferred-register: '' } 160liveins: 161 - { reg: '$sgpr0', virtual-reg: '%0' } 162 - { reg: '$sgpr1', virtual-reg: '%1' } 163 - { reg: '$sgpr2', virtual-reg: '%2' } 164 - { reg: '$sgpr3', virtual-reg: '%3' } 165body: | 166 bb.0: 167 liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 168 169 %3:sgpr_32 = COPY $sgpr3 170 %2:sgpr_32 = COPY $sgpr2 171 %1:sgpr_32 = COPY $sgpr1 172 %0:sgpr_32 = COPY $sgpr0 173 %6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 174 %5:sgpr_128 = COPY %6 175 %7:sreg_32 = S_MOV_B32 0 176 %8:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %6, %7, 0, 0, 0, implicit $exec 177 %16:vgpr_32 = COPY %8.sub1 178 %11:vgpr_32 = COPY %16 179 %17:sreg_64_xexec = IMPLICIT_DEF 180 %10:vgpr_32 = V_SET_INACTIVE_B32 0, %11, 0, undef %12, undef %17, implicit $exec, implicit-def $scc 181 %14:vgpr_32 = COPY %7 182 %13:vgpr_32 = V_MOV_B32_dpp %14, killed %10, 323, 12, 15, 0, implicit $exec 183 early-clobber %15:vgpr_32 = STRICT_WWM killed %13, implicit $exec 184 BUFFER_STORE_DWORD_OFFSET_exact killed %15, %6, %7, 4, 0, 0, implicit $exec 185 S_ENDPGM 0 186 187... 188 189--- 190# Ensure that strict_wwm is not put around an EXEC copy 191#CHECK-LABEL: name: copy_exec 192#CHECK: %7:sreg_64 = COPY $exec 193#CHECK-NEXT: %13:sreg_64 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec 194#CHECK-NEXT: %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 195#CHECK-NEXT: $exec = EXIT_STRICT_WWM %13 196#CHECK-NEXT: %9:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %7.sub0, 0, implicit $exec 197name: copy_exec 198tracksRegLiveness: true 199body: | 200 bb.0: 201 liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 202 203 %3:sgpr_32 = COPY $sgpr3 204 %2:sgpr_32 = COPY $sgpr2 205 %1:sgpr_32 = COPY $sgpr1 206 %0:sgpr_32 = COPY $sgpr0 207 %4:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 208 %5:sreg_32 = S_MOV_B32 0 209 %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %4, %5, 0, 0, 0, implicit $exec 210 211 %8:sreg_64 = COPY $exec 212 %9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 213 %10:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %8.sub0:sreg_64, 0, implicit $exec 214 %11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, %10:vgpr_32, 312, 15, 15, 0, implicit $exec 215 %12:sreg_32 = V_READLANE_B32 %11:vgpr_32, 63 216 early-clobber %13:vgpr_32 = STRICT_WWM %9:vgpr_32, implicit $exec 217 218 BUFFER_STORE_DWORD_OFFSET_exact killed %13, %4, %5, 4, 0, 0, implicit $exec 219 S_ENDPGM 0 220 221... 222 223--- 224# Check exit of WQM is still inserted correctly when SCC is live until block end. 225# Critially this tests that compilation does not fail. 226#CHECK-LABEL: name: scc_always_live 227#CHECK: %8:vreg_128 = IMAGE_SAMPLE_V4_V2 %7 228#CHECK-NEXT: S_CMP_EQ_U32 %2, 0, implicit-def $scc 229#CHECK-NEXT: undef %9.sub0:vreg_64 = nsz arcp nofpexcept V_ADD_F32_e64 230#CHECK-NEXT: %9.sub1:vreg_64 = nsz arcp nofpexcept V_MUL_F32_e32 231#CHECK-NEXT: %14:sreg_32_xm0 = COPY $scc 232#CHECK-NEXT: $exec = S_AND_B64 $exec, %13, implicit-def $scc 233#CHECK-NEXT: $scc = COPY %14 234#CHECK-NEXT: %10:vgpr_32 = nsz arcp nofpexcept V_ADD_F32_e64 235#CHECK-NEXT: %11:vreg_128 = IMAGE_SAMPLE_V4_V2 236#CHECK-NEXT: S_CBRANCH_SCC0 %bb.2 237name: scc_always_live 238tracksRegLiveness: true 239body: | 240 bb.0: 241 liveins: $sgpr1, $sgpr2, $vgpr1, $vgpr2 242 243 $m0 = COPY $sgpr1 244 %0:vgpr_32 = COPY $vgpr1 245 %1:vgpr_32 = COPY $vgpr2 246 %8:sgpr_32 = COPY $sgpr2 247 %100:sgpr_256 = IMPLICIT_DEF 248 %101:sgpr_128 = IMPLICIT_DEF 249 250 %2:vgpr_32 = V_INTERP_P1_F32 %0:vgpr_32, 3, 2, implicit $mode, implicit $m0, implicit $exec 251 %3:vgpr_32 = V_INTERP_P1_F32 %1:vgpr_32, 3, 2, implicit $mode, implicit $m0, implicit $exec 252 253 undef %7.sub0:vreg_64 = COPY %2:vgpr_32 254 %7.sub1:vreg_64 = COPY %3:vgpr_32 255 256 %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %7:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) 257 S_CMP_EQ_U32 %8:sgpr_32, 0, implicit-def $scc 258 259 undef %5.sub0:vreg_64 = nsz arcp nofpexcept V_ADD_F32_e64 0, %4.sub0:vreg_128, 0, %3:vgpr_32, 1, 0, implicit $mode, implicit $exec 260 %5.sub1:vreg_64 = nsz arcp nofpexcept V_MUL_F32_e32 %2, %3, implicit $mode, implicit $exec 261 %6:vgpr_32 = nsz arcp nofpexcept V_ADD_F32_e64 0, %2:vgpr_32, 0, %3:vgpr_32, 1, 0, implicit $mode, implicit $exec 262 263 %9:vreg_128 = IMAGE_SAMPLE_V4_V2 %5:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) 264 265 S_CBRANCH_SCC0 %bb.2, implicit $scc 266 267 bb.1: 268 %10:sreg_32 = S_MOV_B32 0 269 BUFFER_STORE_DWORD_OFFSET_exact %6:vgpr_32, %101:sgpr_128, %10:sreg_32, 4, 0, 0, implicit $exec 270 S_ENDPGM 0 271 272 bb.2: 273 $vgpr0 = COPY %4.sub0:vreg_128 274 $vgpr1 = COPY %4.sub1:vreg_128 275 $vgpr2 = COPY %9.sub0:vreg_128 276 $vgpr3 = COPY %9.sub1:vreg_128 277 SI_RETURN_TO_EPILOG $vgpr0, $vgpr1, $vgpr2, $vgpr3 278... 279 280--- 281# Check that unnecessary instruction do not get marked for WWM 282# 283#CHECK-NOT: ENTER_STRICT_WWM 284#CHECK: BUFFER_LOAD_DWORDX2 285#CHECK: ENTER_STRICT_WWM 286#CHECK: V_SET_INACTIVE_B32 287#CHECK: V_SET_INACTIVE_B32 288#CHECK-NOT: ENTER_STRICT_WWM 289#CHECK: V_MAX 290name: test_wwm_set_inactive_propagation 291tracksRegLiveness: true 292body: | 293 bb.0: 294 liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0 295 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 296 %1:vgpr_32 = COPY $vgpr0 297 %2:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN %1:vgpr_32, %0:sgpr_128, 0, 0, 0, 0, implicit $exec 298 %4:sreg_64_xexec = IMPLICIT_DEF 299 %2.sub0:vreg_64 = V_SET_INACTIVE_B32 0, %2.sub0:vreg_64, 0, 0, undef %4, implicit $exec, implicit-def $scc 300 %2.sub1:vreg_64 = V_SET_INACTIVE_B32 0, %2.sub1:vreg_64, 0, 0, undef %4, implicit $exec, implicit-def $scc 301 %3:vreg_64 = nnan nsz arcp contract reassoc nofpexcept V_MAX_F64_e64 0, %2:vreg_64, 0, %2:vreg_64, 0, 0, implicit $mode, implicit $exec 302 $vgpr0 = STRICT_WWM %3.sub0:vreg_64, implicit $exec 303 $vgpr1 = STRICT_WWM %3.sub1:vreg_64, implicit $exec 304 SI_RETURN_TO_EPILOG $vgpr0, $vgpr1 305... 306 307--- 308# Check that WQM marking occurs correctly through phi nodes in live range graph. 309# If not then initial V_MOV will not be in WQM. 310# 311#CHECK-LABEL: name: test_wqm_lr_phi 312#CHECK: COPY $exec 313#CHECK-NEXT: S_WQM 314#CHECK-NEXT: V_MOV_B32_e32 -10 315#CHECK-NEXT: V_MOV_B32_e32 0 316name: test_wqm_lr_phi 317tracksRegLiveness: true 318body: | 319 bb.0: 320 undef %0.sub0:vreg_64 = V_MOV_B32_e32 -10, implicit $exec 321 %0.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec 322 %1:sreg_64 = S_GETPC_B64 323 %2:sgpr_256 = S_LOAD_DWORDX8_IMM %1:sreg_64, 32, 0 324 325 bb.1: 326 $vcc = V_CMP_LT_U32_e64 4, 4, implicit $exec 327 S_CBRANCH_VCCNZ %bb.3, implicit $vcc 328 S_BRANCH %bb.2 329 330 bb.2: 331 %0.sub0:vreg_64 = V_ADD_U32_e32 1, %0.sub1, implicit $exec 332 S_BRANCH %bb.3 333 334 bb.3: 335 %0.sub1:vreg_64 = V_ADD_U32_e32 1, %0.sub1, implicit $exec 336 S_BRANCH %bb.4 337 338 bb.4: 339 %3:sgpr_128 = IMPLICIT_DEF 340 %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %2:sgpr_256, %3:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) 341 $vgpr0 = COPY %4.sub0:vreg_128 342 $vgpr1 = COPY %4.sub1:vreg_128 343 SI_RETURN_TO_EPILOG $vgpr0, $vgpr1 344... 345 346--- 347#CHECK-LABEL: name: no_wqm_in_cs 348#CHECK-NOT: S_WQM 349name: no_wqm_in_cs 350tracksRegLiveness: true 351body: | 352 bb.0: 353 liveins: $vgpr1, $vgpr2 354 355 undef %0.sub0:vreg_64 = COPY $vgpr1 356 %0.sub1:vreg_64 = COPY $vgpr2 357 %100:sgpr_256 = IMPLICIT_DEF 358 %101:sgpr_128 = IMPLICIT_DEF 359 360 %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) 361... 362 363--- 364#CHECK-LABEL: name: no_wqm_in_es 365#CHECK-NOT: S_WQM 366name: no_wqm_in_es 367tracksRegLiveness: true 368body: | 369 bb.0: 370 liveins: $vgpr1, $vgpr2 371 372 undef %0.sub0:vreg_64 = COPY $vgpr1 373 %0.sub1:vreg_64 = COPY $vgpr2 374 %100:sgpr_256 = IMPLICIT_DEF 375 %101:sgpr_128 = IMPLICIT_DEF 376 377 %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) 378... 379 380--- 381#CHECK-LABEL: name: no_wqm_in_gs 382#CHECK-NOT: S_WQM 383name: no_wqm_in_gs 384tracksRegLiveness: true 385body: | 386 bb.0: 387 liveins: $vgpr1, $vgpr2 388 389 undef %0.sub0:vreg_64 = COPY $vgpr1 390 %0.sub1:vreg_64 = COPY $vgpr2 391 %100:sgpr_256 = IMPLICIT_DEF 392 %101:sgpr_128 = IMPLICIT_DEF 393 394 %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) 395... 396 397--- 398#CHECK-LABEL: name: no_wqm_in_hs 399#CHECK-NOT: S_WQM 400name: no_wqm_in_hs 401tracksRegLiveness: true 402body: | 403 bb.0: 404 liveins: $vgpr1, $vgpr2 405 406 undef %0.sub0:vreg_64 = COPY $vgpr1 407 %0.sub1:vreg_64 = COPY $vgpr2 408 %100:sgpr_256 = IMPLICIT_DEF 409 %101:sgpr_128 = IMPLICIT_DEF 410 411 %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) 412... 413 414--- 415#CHECK-LABEL: name: no_wqm_in_ls 416#CHECK-NOT: S_WQM 417name: no_wqm_in_ls 418tracksRegLiveness: true 419body: | 420 bb.0: 421 liveins: $vgpr1, $vgpr2 422 423 undef %0.sub0:vreg_64 = COPY $vgpr1 424 %0.sub1:vreg_64 = COPY $vgpr2 425 %100:sgpr_256 = IMPLICIT_DEF 426 %101:sgpr_128 = IMPLICIT_DEF 427 428 %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) 429... 430 431--- 432#CHECK-LABEL: name: no_wqm_in_vs 433#CHECK-NOT: S_WQM 434name: no_wqm_in_vs 435tracksRegLiveness: true 436body: | 437 bb.0: 438 liveins: $vgpr1, $vgpr2 439 440 undef %0.sub0:vreg_64 = COPY $vgpr1 441 %0.sub1:vreg_64 = COPY $vgpr2 442 %100:sgpr_256 = IMPLICIT_DEF 443 %101:sgpr_128 = IMPLICIT_DEF 444 445 %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) 446... 447