xref: /llvm-project/llvm/test/CodeGen/AMDGPU/wqm.mir (revision 60a8b2b1d0842e257e2add6fb1b27cf45699b641)
1# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-wqm -o -  %s | FileCheck %s
2
3--- |
4  define amdgpu_ps void @test_strict_wwm_scc() {
5    ret void
6  }
7  define amdgpu_ps void @test_strict_wwm_scc2() {
8    ret void
9  }
10  define amdgpu_ps void @no_cfg() {
11    ret void
12  }
13  define amdgpu_ps void @copy_exec() {
14    ret void
15  }
16  define amdgpu_ps void @scc_always_live() {
17    ret void
18  }
19  define amdgpu_ps void @test_wwm_set_inactive_propagation() {
20    ret void
21  }
22  define amdgpu_ps void @test_wqm_lr_phi() {
23    ret void
24  }
25  define amdgpu_cs void @no_wqm_in_cs() {
26    ret void
27  }
28  define amdgpu_es void @no_wqm_in_es() {
29    ret void
30  }
31  define amdgpu_gs void @no_wqm_in_gs() {
32    ret void
33  }
34  define amdgpu_hs void @no_wqm_in_hs() {
35    ret void
36  }
37  define amdgpu_ls void @no_wqm_in_ls() {
38    ret void
39  }
40  define amdgpu_vs void @no_wqm_in_vs() {
41    ret void
42  }
43...
44---
45
46---
47# Check for awareness that s_or_saveexec_b64 clobbers SCC
48#
49#CHECK: ENTER_STRICT_WWM
50#CHECK: S_CMP_LT_I32
51#CHECK: S_CSELECT_B32
52name:            test_strict_wwm_scc
53alignment:       1
54exposesReturnsTwice: false
55legalized:       false
56regBankSelected: false
57selected:        false
58tracksRegLiveness: true
59registers:
60  - { id: 0, class: sgpr_32, preferred-register: '' }
61  - { id: 1, class: sgpr_32, preferred-register: '' }
62  - { id: 2, class: sgpr_32, preferred-register: '' }
63  - { id: 3, class: vgpr_32, preferred-register: '' }
64  - { id: 4, class: vgpr_32, preferred-register: '' }
65  - { id: 5, class: sgpr_32, preferred-register: '' }
66  - { id: 6, class: vgpr_32, preferred-register: '' }
67  - { id: 7, class: vgpr_32, preferred-register: '' }
68  - { id: 8, class: sreg_32_xm0, preferred-register: '' }
69  - { id: 9, class: sreg_32, preferred-register: '' }
70  - { id: 10, class: sreg_32, preferred-register: '' }
71  - { id: 11, class: vgpr_32, preferred-register: '' }
72  - { id: 12, class: vgpr_32, preferred-register: '' }
73liveins:
74  - { reg: '$sgpr0', virtual-reg: '%0' }
75  - { reg: '$sgpr1', virtual-reg: '%1' }
76  - { reg: '$sgpr2', virtual-reg: '%2' }
77  - { reg: '$vgpr0', virtual-reg: '%3' }
78body:             |
79  bb.0:
80    liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
81
82    %3 = COPY $vgpr0
83    %2 = COPY $sgpr2
84    %1 = COPY $sgpr1
85    %0 = COPY $sgpr0
86    S_CMP_LT_I32 0, %0, implicit-def $scc
87    %12 = V_ADD_CO_U32_e32 %3, %3, implicit-def $vcc, implicit $exec
88    %5 = S_CSELECT_B32 %2, %1, implicit $scc
89    %11 = V_ADD_CO_U32_e32 %5, %12, implicit-def $vcc, implicit $exec
90    $vgpr0 = STRICT_WWM %11, implicit $exec
91    SI_RETURN_TO_EPILOG $vgpr0
92
93...
94
95---
96# Second test for awareness that s_or_saveexec_b64 clobbers SCC
97# Because entry block is treated differently.
98#
99#CHECK: %bb.1
100#CHECK: S_CMP_LT_I32
101#CHECK: COPY $scc
102#CHECK: ENTER_STRICT_WWM
103#CHECK: $scc = COPY
104#CHECK: S_CSELECT_B32
105name:            test_strict_wwm_scc2
106tracksRegLiveness: true
107body:             |
108  bb.0:
109    liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
110
111    %3:vgpr_32 = COPY $vgpr0
112    %2:sgpr_32 = COPY $sgpr2
113    %1:sgpr_32 = COPY $sgpr1
114    %0:sgpr_32 = COPY $sgpr0
115    %13:sgpr_128 = IMPLICIT_DEF
116
117  bb.1:
118    S_CMP_LT_I32 0, %0:sgpr_32, implicit-def $scc
119    %10:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %3:vgpr_32, %13:sgpr_128, 0, 0, 0, 0, implicit $exec
120    %12:vgpr_32 = V_ADD_CO_U32_e32 %3:vgpr_32, %3:vgpr_32, implicit-def $vcc, implicit $exec
121    %5:sgpr_32 = S_CSELECT_B32 %2:sgpr_32, %1:sgpr_32, implicit $scc
122    %11:vgpr_32 = V_ADD_CO_U32_e32 %5:sgpr_32, %12:vgpr_32, implicit-def $vcc, implicit $exec
123    $vgpr0 = STRICT_WWM %11:vgpr_32, implicit $exec
124    $vgpr1 = COPY %10:vgpr_32
125    SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
126
127...
128
129---
130# V_SET_INACTIVE, when its second operand is undef, is replaced by a
131# COPY by si-wqm. Ensure the instruction is removed.
132#CHECK-NOT: V_SET_INACTIVE
133name:            no_cfg
134alignment:       1
135exposesReturnsTwice: false
136legalized:       false
137regBankSelected: false
138selected:        false
139failedISel:      false
140tracksRegLiveness: true
141hasWinCFI:       false
142registers:
143  - { id: 0, class: sgpr_32, preferred-register: '' }
144  - { id: 1, class: sgpr_32, preferred-register: '' }
145  - { id: 2, class: sgpr_32, preferred-register: '' }
146  - { id: 3, class: sgpr_32, preferred-register: '' }
147  - { id: 4, class: sgpr_32, preferred-register: '' }
148  - { id: 5, class: sgpr_128, preferred-register: '' }
149  - { id: 6, class: sgpr_128, preferred-register: '' }
150  - { id: 7, class: sreg_32, preferred-register: '' }
151  - { id: 8, class: vreg_64, preferred-register: '' }
152  - { id: 9, class: sreg_32, preferred-register: '' }
153  - { id: 10, class: vgpr_32, preferred-register: '' }
154  - { id: 11, class: vgpr_32, preferred-register: '' }
155  - { id: 12, class: vgpr_32, preferred-register: '' }
156  - { id: 13, class: vgpr_32, preferred-register: '' }
157  - { id: 14, class: vgpr_32, preferred-register: '' }
158  - { id: 15, class: vgpr_32, preferred-register: '' }
159  - { id: 16, class: vgpr_32, preferred-register: '' }
160liveins:
161  - { reg: '$sgpr0', virtual-reg: '%0' }
162  - { reg: '$sgpr1', virtual-reg: '%1' }
163  - { reg: '$sgpr2', virtual-reg: '%2' }
164  - { reg: '$sgpr3', virtual-reg: '%3' }
165body:             |
166  bb.0:
167    liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
168
169    %3:sgpr_32 = COPY $sgpr3
170    %2:sgpr_32 = COPY $sgpr2
171    %1:sgpr_32 = COPY $sgpr1
172    %0:sgpr_32 = COPY $sgpr0
173    %6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
174    %5:sgpr_128 = COPY %6
175    %7:sreg_32 = S_MOV_B32 0
176    %8:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %6, %7, 0, 0, 0, implicit $exec
177    %16:vgpr_32 = COPY %8.sub1
178    %11:vgpr_32 = COPY %16
179    %17:sreg_64_xexec = IMPLICIT_DEF
180    %10:vgpr_32 = V_SET_INACTIVE_B32 0, %11, 0, undef %12, undef %17, implicit $exec, implicit-def $scc
181    %14:vgpr_32 = COPY %7
182    %13:vgpr_32 = V_MOV_B32_dpp %14, killed %10, 323, 12, 15, 0, implicit $exec
183    early-clobber %15:vgpr_32 = STRICT_WWM killed %13, implicit $exec
184    BUFFER_STORE_DWORD_OFFSET_exact killed %15, %6, %7, 4, 0, 0, implicit $exec
185    S_ENDPGM 0
186
187...
188
189---
190# Ensure that strict_wwm is not put around an EXEC copy
191#CHECK-LABEL: name: copy_exec
192#CHECK: %7:sreg_64 = COPY $exec
193#CHECK-NEXT: %13:sreg_64 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
194#CHECK-NEXT: %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
195#CHECK-NEXT: $exec = EXIT_STRICT_WWM %13
196#CHECK-NEXT: %9:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %7.sub0, 0, implicit $exec
197name:            copy_exec
198tracksRegLiveness: true
199body:             |
200  bb.0:
201    liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
202
203    %3:sgpr_32 = COPY $sgpr3
204    %2:sgpr_32 = COPY $sgpr2
205    %1:sgpr_32 = COPY $sgpr1
206    %0:sgpr_32 = COPY $sgpr0
207    %4:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
208    %5:sreg_32 = S_MOV_B32 0
209    %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %4, %5, 0, 0, 0, implicit $exec
210
211    %8:sreg_64 = COPY $exec
212    %9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
213    %10:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %8.sub0:sreg_64, 0, implicit $exec
214    %11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, %10:vgpr_32, 312, 15, 15, 0, implicit $exec
215    %12:sreg_32 = V_READLANE_B32 %11:vgpr_32, 63
216    early-clobber %13:vgpr_32 = STRICT_WWM %9:vgpr_32, implicit $exec
217
218    BUFFER_STORE_DWORD_OFFSET_exact killed %13, %4, %5, 4, 0, 0, implicit $exec
219    S_ENDPGM 0
220
221...
222
223---
224# Check exit of WQM is still inserted correctly when SCC is live until block end.
225# Critially this tests that compilation does not fail.
226#CHECK-LABEL: name: scc_always_live
227#CHECK: %8:vreg_128 = IMAGE_SAMPLE_V4_V2 %7
228#CHECK-NEXT: S_CMP_EQ_U32 %2, 0, implicit-def $scc
229#CHECK-NEXT: undef %9.sub0:vreg_64 = nsz arcp nofpexcept V_ADD_F32_e64
230#CHECK-NEXT: %9.sub1:vreg_64 = nsz arcp nofpexcept V_MUL_F32_e32
231#CHECK-NEXT: %14:sreg_32_xm0 = COPY $scc
232#CHECK-NEXT: $exec = S_AND_B64 $exec, %13, implicit-def $scc
233#CHECK-NEXT: $scc = COPY %14
234#CHECK-NEXT: %10:vgpr_32 = nsz arcp nofpexcept V_ADD_F32_e64
235#CHECK-NEXT: %11:vreg_128 = IMAGE_SAMPLE_V4_V2
236#CHECK-NEXT: S_CBRANCH_SCC0 %bb.2
237name:            scc_always_live
238tracksRegLiveness: true
239body:             |
240  bb.0:
241    liveins: $sgpr1, $sgpr2, $vgpr1, $vgpr2
242
243    $m0 = COPY $sgpr1
244    %0:vgpr_32 = COPY $vgpr1
245    %1:vgpr_32 = COPY $vgpr2
246    %8:sgpr_32 = COPY $sgpr2
247    %100:sgpr_256 = IMPLICIT_DEF
248    %101:sgpr_128 = IMPLICIT_DEF
249
250    %2:vgpr_32 = V_INTERP_P1_F32 %0:vgpr_32, 3, 2, implicit $mode, implicit $m0, implicit $exec
251    %3:vgpr_32 = V_INTERP_P1_F32 %1:vgpr_32, 3, 2, implicit $mode, implicit $m0, implicit $exec
252
253    undef %7.sub0:vreg_64 = COPY %2:vgpr_32
254    %7.sub1:vreg_64 = COPY %3:vgpr_32
255
256    %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %7:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
257    S_CMP_EQ_U32 %8:sgpr_32, 0, implicit-def $scc
258
259    undef %5.sub0:vreg_64 = nsz arcp nofpexcept V_ADD_F32_e64 0, %4.sub0:vreg_128, 0, %3:vgpr_32, 1, 0, implicit $mode, implicit $exec
260    %5.sub1:vreg_64 = nsz arcp nofpexcept V_MUL_F32_e32 %2, %3, implicit $mode, implicit $exec
261    %6:vgpr_32 = nsz arcp nofpexcept V_ADD_F32_e64 0, %2:vgpr_32, 0, %3:vgpr_32, 1, 0, implicit $mode, implicit $exec
262
263    %9:vreg_128 = IMAGE_SAMPLE_V4_V2 %5:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
264
265    S_CBRANCH_SCC0 %bb.2, implicit $scc
266
267  bb.1:
268    %10:sreg_32 = S_MOV_B32 0
269    BUFFER_STORE_DWORD_OFFSET_exact %6:vgpr_32, %101:sgpr_128, %10:sreg_32, 4, 0, 0, implicit $exec
270    S_ENDPGM 0
271
272  bb.2:
273    $vgpr0 = COPY %4.sub0:vreg_128
274    $vgpr1 = COPY %4.sub1:vreg_128
275    $vgpr2 = COPY %9.sub0:vreg_128
276    $vgpr3 = COPY %9.sub1:vreg_128
277    SI_RETURN_TO_EPILOG $vgpr0, $vgpr1, $vgpr2, $vgpr3
278...
279
280---
281# Check that unnecessary instruction do not get marked for WWM
282#
283#CHECK-NOT: ENTER_STRICT_WWM
284#CHECK: BUFFER_LOAD_DWORDX2
285#CHECK: ENTER_STRICT_WWM
286#CHECK: V_SET_INACTIVE_B32
287#CHECK: V_SET_INACTIVE_B32
288#CHECK-NOT: ENTER_STRICT_WWM
289#CHECK: V_MAX
290name:            test_wwm_set_inactive_propagation
291tracksRegLiveness: true
292body:             |
293  bb.0:
294    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
295    %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
296    %1:vgpr_32 = COPY $vgpr0
297    %2:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN %1:vgpr_32, %0:sgpr_128, 0, 0, 0, 0, implicit $exec
298    %4:sreg_64_xexec = IMPLICIT_DEF
299    %2.sub0:vreg_64 = V_SET_INACTIVE_B32 0, %2.sub0:vreg_64, 0, 0, undef %4, implicit $exec, implicit-def $scc
300    %2.sub1:vreg_64 = V_SET_INACTIVE_B32 0, %2.sub1:vreg_64, 0, 0, undef %4, implicit $exec, implicit-def $scc
301    %3:vreg_64 = nnan nsz arcp contract reassoc nofpexcept V_MAX_F64_e64 0, %2:vreg_64, 0, %2:vreg_64, 0, 0, implicit $mode, implicit $exec
302    $vgpr0 = STRICT_WWM %3.sub0:vreg_64, implicit $exec
303    $vgpr1 = STRICT_WWM %3.sub1:vreg_64, implicit $exec
304    SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
305...
306
307---
308# Check that WQM marking occurs correctly through phi nodes in live range graph.
309# If not then initial V_MOV will not be in WQM.
310#
311#CHECK-LABEL: name: test_wqm_lr_phi
312#CHECK: COPY $exec
313#CHECK-NEXT: S_WQM
314#CHECK-NEXT: V_MOV_B32_e32 -10
315#CHECK-NEXT: V_MOV_B32_e32 0
316name:            test_wqm_lr_phi
317tracksRegLiveness: true
318body:             |
319  bb.0:
320    undef %0.sub0:vreg_64 = V_MOV_B32_e32 -10, implicit $exec
321    %0.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
322    %1:sreg_64 = S_GETPC_B64
323    %2:sgpr_256 = S_LOAD_DWORDX8_IMM %1:sreg_64, 32, 0
324
325   bb.1:
326    $vcc = V_CMP_LT_U32_e64 4, 4, implicit $exec
327    S_CBRANCH_VCCNZ %bb.3, implicit $vcc
328    S_BRANCH %bb.2
329
330   bb.2:
331    %0.sub0:vreg_64 = V_ADD_U32_e32 1, %0.sub1, implicit $exec
332    S_BRANCH %bb.3
333
334   bb.3:
335    %0.sub1:vreg_64 = V_ADD_U32_e32 1, %0.sub1, implicit $exec
336    S_BRANCH %bb.4
337
338   bb.4:
339    %3:sgpr_128 = IMPLICIT_DEF
340    %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %2:sgpr_256, %3:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
341    $vgpr0 = COPY %4.sub0:vreg_128
342    $vgpr1 = COPY %4.sub1:vreg_128
343    SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
344...
345
346---
347#CHECK-LABEL: name: no_wqm_in_cs
348#CHECK-NOT: S_WQM
349name:            no_wqm_in_cs
350tracksRegLiveness: true
351body:             |
352  bb.0:
353    liveins: $vgpr1, $vgpr2
354
355    undef %0.sub0:vreg_64 = COPY $vgpr1
356    %0.sub1:vreg_64 = COPY $vgpr2
357    %100:sgpr_256 = IMPLICIT_DEF
358    %101:sgpr_128 = IMPLICIT_DEF
359
360    %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
361...
362
363---
364#CHECK-LABEL: name: no_wqm_in_es
365#CHECK-NOT: S_WQM
366name:            no_wqm_in_es
367tracksRegLiveness: true
368body:             |
369  bb.0:
370    liveins: $vgpr1, $vgpr2
371
372    undef %0.sub0:vreg_64 = COPY $vgpr1
373    %0.sub1:vreg_64 = COPY $vgpr2
374    %100:sgpr_256 = IMPLICIT_DEF
375    %101:sgpr_128 = IMPLICIT_DEF
376
377    %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
378...
379
380---
381#CHECK-LABEL: name: no_wqm_in_gs
382#CHECK-NOT: S_WQM
383name:            no_wqm_in_gs
384tracksRegLiveness: true
385body:             |
386  bb.0:
387    liveins: $vgpr1, $vgpr2
388
389    undef %0.sub0:vreg_64 = COPY $vgpr1
390    %0.sub1:vreg_64 = COPY $vgpr2
391    %100:sgpr_256 = IMPLICIT_DEF
392    %101:sgpr_128 = IMPLICIT_DEF
393
394    %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
395...
396
397---
398#CHECK-LABEL: name: no_wqm_in_hs
399#CHECK-NOT: S_WQM
400name:            no_wqm_in_hs
401tracksRegLiveness: true
402body:             |
403  bb.0:
404    liveins: $vgpr1, $vgpr2
405
406    undef %0.sub0:vreg_64 = COPY $vgpr1
407    %0.sub1:vreg_64 = COPY $vgpr2
408    %100:sgpr_256 = IMPLICIT_DEF
409    %101:sgpr_128 = IMPLICIT_DEF
410
411    %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
412...
413
414---
415#CHECK-LABEL: name: no_wqm_in_ls
416#CHECK-NOT: S_WQM
417name:            no_wqm_in_ls
418tracksRegLiveness: true
419body:             |
420  bb.0:
421    liveins: $vgpr1, $vgpr2
422
423    undef %0.sub0:vreg_64 = COPY $vgpr1
424    %0.sub1:vreg_64 = COPY $vgpr2
425    %100:sgpr_256 = IMPLICIT_DEF
426    %101:sgpr_128 = IMPLICIT_DEF
427
428    %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
429...
430
431---
432#CHECK-LABEL: name: no_wqm_in_vs
433#CHECK-NOT: S_WQM
434name:            no_wqm_in_vs
435tracksRegLiveness: true
436body:             |
437  bb.0:
438    liveins: $vgpr1, $vgpr2
439
440    undef %0.sub0:vreg_64 = COPY $vgpr1
441    %0.sub1:vreg_64 = COPY $vgpr2
442    %100:sgpr_256 = IMPLICIT_DEF
443    %101:sgpr_128 = IMPLICIT_DEF
444
445    %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
446...
447