xref: /llvm-project/llvm/test/CodeGen/AMDGPU/spill-wait.mir (revision 4f90e75bdc156d2630da525eb74d00611753c706)
1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX9 %s
3# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX12 %s
4
5# There shall be no S_WAITCNT between two stores.
6
7---
8name: spill_vgpr_tuple
9
10body: |
11  bb.0:
12    liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79
13
14    ; GFX9-LABEL: name: spill_vgpr_tuple
15    ; GFX9: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79
16    ; GFX9-NEXT: {{  $}}
17    ; GFX9-NEXT: S_WAITCNT 0
18    ; GFX9-NEXT: $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec
19    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65
20    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65
21    ; GFX9-NEXT: S_ENDPGM 0
22    ;
23    ; GFX12-LABEL: name: spill_vgpr_tuple
24    ; GFX12: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79
25    ; GFX12-NEXT: {{  $}}
26    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
27    ; GFX12-NEXT: S_WAIT_EXPCNT 0
28    ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
29    ; GFX12-NEXT: S_WAIT_BVHCNT 0
30    ; GFX12-NEXT: S_WAIT_KMCNT 0
31    ; GFX12-NEXT: $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec
32    ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65
33    ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65
34    ; GFX12-NEXT: S_ENDPGM 0
35    $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec
36    BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65
37    BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65
38    S_ENDPGM 0
39...
40
41# Make sure that while ignoring implicit operands we will not ignore implicit $vcc on VALU
42
43---
44name: load_vcc_wait
45
46body: |
47  bb.0:
48    liveins: $vgpr0, $sgpr10_sgpr11
49
50    ; GFX9-LABEL: name: load_vcc_wait
51    ; GFX9: liveins: $vgpr0, $sgpr10_sgpr11
52    ; GFX9-NEXT: {{  $}}
53    ; GFX9-NEXT: S_WAITCNT 0
54    ; GFX9-NEXT: $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
55    ; GFX9-NEXT: S_WAITCNT 49279
56    ; GFX9-NEXT: $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
57    ; GFX9-NEXT: S_ENDPGM 0
58    ;
59    ; GFX12-LABEL: name: load_vcc_wait
60    ; GFX12: liveins: $vgpr0, $sgpr10_sgpr11
61    ; GFX12-NEXT: {{  $}}
62    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
63    ; GFX12-NEXT: S_WAIT_EXPCNT 0
64    ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
65    ; GFX12-NEXT: S_WAIT_BVHCNT 0
66    ; GFX12-NEXT: S_WAIT_KMCNT 0
67    ; GFX12-NEXT: $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
68    ; GFX12-NEXT: S_WAIT_KMCNT 0
69    ; GFX12-NEXT: $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
70    ; GFX12-NEXT: S_ENDPGM 0
71    $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
72    $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
73    S_ENDPGM 0
74...
75
76# Make sure that while ignoring implicit operands we will not ignore implicit $flat_src on FLAT
77
78---
79name: load_flat_scr_lo_flat_load_wait
80
81body: |
82  bb.0:
83    liveins: $sgpr10_sgpr11, $vgpr0_vgpr1
84
85    ; GFX9-LABEL: name: load_flat_scr_lo_flat_load_wait
86    ; GFX9: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1
87    ; GFX9-NEXT: {{  $}}
88    ; GFX9-NEXT: S_WAITCNT 0
89    ; GFX9-NEXT: $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
90    ; GFX9-NEXT: S_WAITCNT 49279
91    ; GFX9-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
92    ; GFX9-NEXT: S_ENDPGM 0
93    ;
94    ; GFX12-LABEL: name: load_flat_scr_lo_flat_load_wait
95    ; GFX12: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1
96    ; GFX12-NEXT: {{  $}}
97    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
98    ; GFX12-NEXT: S_WAIT_EXPCNT 0
99    ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
100    ; GFX12-NEXT: S_WAIT_BVHCNT 0
101    ; GFX12-NEXT: S_WAIT_KMCNT 0
102    ; GFX12-NEXT: $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
103    ; GFX12-NEXT: S_WAIT_KMCNT 0
104    ; GFX12-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
105    ; GFX12-NEXT: S_ENDPGM 0
106    $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
107    $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
108    S_ENDPGM 0
109...
110
111---
112name: load_flat_scr_lo_scratch_store_wait
113
114body: |
115  bb.0:
116    liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32
117
118    ; GFX9-LABEL: name: load_flat_scr_lo_scratch_store_wait
119    ; GFX9: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32
120    ; GFX9-NEXT: {{  $}}
121    ; GFX9-NEXT: S_WAITCNT 0
122    ; GFX9-NEXT: $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
123    ; GFX9-NEXT: S_WAITCNT 49279
124    ; GFX9-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr
125    ; GFX9-NEXT: S_ENDPGM 0
126    ;
127    ; GFX12-LABEL: name: load_flat_scr_lo_scratch_store_wait
128    ; GFX12: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32
129    ; GFX12-NEXT: {{  $}}
130    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
131    ; GFX12-NEXT: S_WAIT_EXPCNT 0
132    ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
133    ; GFX12-NEXT: S_WAIT_BVHCNT 0
134    ; GFX12-NEXT: S_WAIT_KMCNT 0
135    ; GFX12-NEXT: $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
136    ; GFX12-NEXT: S_WAIT_KMCNT 0
137    ; GFX12-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr
138    ; GFX12-NEXT: S_ENDPGM 0
139    $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
140    SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr
141    S_ENDPGM 0
142...
143
144# Check that implicit spill defs do not force wait to zero on the first store
145
146---
147name: spill_load_store
148
149body: |
150  bb.0:
151    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
152
153    ; GFX9-LABEL: name: spill_load_store
154    ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
155    ; GFX9-NEXT: {{  $}}
156    ; GFX9-NEXT: S_WAITCNT 0
157    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
158    ; GFX9-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec
159    ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec
160    ; GFX9-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
161    ; GFX9-NEXT: S_WAITCNT 3955
162    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
163    ; GFX9-NEXT: S_WAITCNT 3955
164    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec
165    ; GFX9-NEXT: S_WAITCNT 3955
166    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec
167    ; GFX9-NEXT: S_WAITCNT 3955
168    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3
169    ; GFX9-NEXT: S_ENDPGM 0
170    ;
171    ; GFX12-LABEL: name: spill_load_store
172    ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
173    ; GFX12-NEXT: {{  $}}
174    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
175    ; GFX12-NEXT: S_WAIT_EXPCNT 0
176    ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
177    ; GFX12-NEXT: S_WAIT_BVHCNT 0
178    ; GFX12-NEXT: S_WAIT_KMCNT 0
179    ; GFX12-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
180    ; GFX12-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec
181    ; GFX12-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec
182    ; GFX12-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
183    ; GFX12-NEXT: S_WAIT_LOADCNT 3
184    ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
185    ; GFX12-NEXT: S_WAIT_LOADCNT 2
186    ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec
187    ; GFX12-NEXT: S_WAIT_LOADCNT 1
188    ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec
189    ; GFX12-NEXT: S_WAIT_LOADCNT 0
190    ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3
191    ; GFX12-NEXT: S_ENDPGM 0
192    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
193    $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec
194    $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec
195    $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
196    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
197    BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec
198    BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec
199    BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3
200    S_ENDPGM 0
201...
202
203# Make sure we have wait to mitigate WAW on gfx12
204
205---
206name: scratch_load_waw
207body:             |
208  bb.0.entry:
209    liveins: $vgpr0, $sgpr0
210
211    ; GFX9-LABEL: name: scratch_load_waw
212    ; GFX9: liveins: $vgpr0, $sgpr0
213    ; GFX9-NEXT: {{  $}}
214    ; GFX9-NEXT: S_WAITCNT 0
215    ; GFX9-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
216    ; GFX9-NEXT: $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr
217    ; GFX9-NEXT: S_ENDPGM 0
218    ;
219    ; GFX12-LABEL: name: scratch_load_waw
220    ; GFX12: liveins: $vgpr0, $sgpr0
221    ; GFX12-NEXT: {{  $}}
222    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
223    ; GFX12-NEXT: S_WAIT_EXPCNT 0
224    ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
225    ; GFX12-NEXT: S_WAIT_BVHCNT 0
226    ; GFX12-NEXT: S_WAIT_KMCNT 0
227    ; GFX12-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
228    ; GFX12-NEXT: S_WAIT_LOADCNT 0
229    ; GFX12-NEXT: $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr
230    ; GFX12-NEXT: S_ENDPGM 0
231    $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
232    $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr
233    S_ENDPGM 0
234...
235