xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2# RUN: llc -mtriple=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
3# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
4# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
5# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s
6# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX12 %s
7
8---
9
10name: load_atomic_flat_s32_seq_cst
11legalized:       true
12regBankSelected: true
13tracksRegLiveness: true
14
15body: |
16  bb.0:
17    liveins:  $vgpr0_vgpr1
18
19    ; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst
20    ; GFX7: liveins: $vgpr0_vgpr1
21    ; GFX7-NEXT: {{  $}}
22    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
23    ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
24    ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
25    ;
26    ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst
27    ; GFX9: liveins: $vgpr0_vgpr1
28    ; GFX9-NEXT: {{  $}}
29    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
30    ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
31    ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
32    ;
33    ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst
34    ; GFX10: liveins: $vgpr0_vgpr1
35    ; GFX10-NEXT: {{  $}}
36    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
37    ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
38    ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
39    ;
40    ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst
41    ; GFX11: liveins: $vgpr0_vgpr1
42    ; GFX11-NEXT: {{  $}}
43    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
44    ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
45    ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
46    ;
47    ; GFX12-LABEL: name: load_atomic_flat_s32_seq_cst
48    ; GFX12: liveins: $vgpr0_vgpr1
49    ; GFX12-NEXT: {{  $}}
50    ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
51    ; GFX12-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
52    ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
53    %0:vgpr(p0) = COPY $vgpr0_vgpr1
54    %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst (s32), align 4, addrspace 0)
55    $vgpr0 = COPY %1
56
57...
58
59---
60
61name: load_atomic_flat_v2s16_seq_cst
62legalized:       true
63regBankSelected: true
64tracksRegLiveness: true
65
66body: |
67  bb.0:
68    liveins:  $vgpr0_vgpr1
69
70    ; GFX7-LABEL: name: load_atomic_flat_v2s16_seq_cst
71    ; GFX7: liveins: $vgpr0_vgpr1
72    ; GFX7-NEXT: {{  $}}
73    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
74    ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>))
75    ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
76    ;
77    ; GFX9-LABEL: name: load_atomic_flat_v2s16_seq_cst
78    ; GFX9: liveins: $vgpr0_vgpr1
79    ; GFX9-NEXT: {{  $}}
80    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
81    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>))
82    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
83    ;
84    ; GFX10-LABEL: name: load_atomic_flat_v2s16_seq_cst
85    ; GFX10: liveins: $vgpr0_vgpr1
86    ; GFX10-NEXT: {{  $}}
87    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
88    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>))
89    ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
90    ;
91    ; GFX11-LABEL: name: load_atomic_flat_v2s16_seq_cst
92    ; GFX11: liveins: $vgpr0_vgpr1
93    ; GFX11-NEXT: {{  $}}
94    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
95    ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>))
96    ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
97    ;
98    ; GFX12-LABEL: name: load_atomic_flat_v2s16_seq_cst
99    ; GFX12: liveins: $vgpr0_vgpr1
100    ; GFX12-NEXT: {{  $}}
101    ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
102    ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>))
103    ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
104    %0:vgpr(p0) = COPY $vgpr0_vgpr1
105    %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s16>), align 4, addrspace 0)
106    $vgpr0 = COPY %1
107
108...
109
110---
111
112name: load_atomic_flat_p3_seq_cst
113legalized:       true
114regBankSelected: true
115tracksRegLiveness: true
116
117body: |
118  bb.0:
119    liveins:  $vgpr0_vgpr1
120
121    ; GFX7-LABEL: name: load_atomic_flat_p3_seq_cst
122    ; GFX7: liveins: $vgpr0_vgpr1
123    ; GFX7-NEXT: {{  $}}
124    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
125    ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3))
126    ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](p3)
127    ;
128    ; GFX9-LABEL: name: load_atomic_flat_p3_seq_cst
129    ; GFX9: liveins: $vgpr0_vgpr1
130    ; GFX9-NEXT: {{  $}}
131    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
132    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3))
133    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3)
134    ;
135    ; GFX10-LABEL: name: load_atomic_flat_p3_seq_cst
136    ; GFX10: liveins: $vgpr0_vgpr1
137    ; GFX10-NEXT: {{  $}}
138    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
139    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3))
140    ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p3)
141    ;
142    ; GFX11-LABEL: name: load_atomic_flat_p3_seq_cst
143    ; GFX11: liveins: $vgpr0_vgpr1
144    ; GFX11-NEXT: {{  $}}
145    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
146    ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3))
147    ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p3)
148    ;
149    ; GFX12-LABEL: name: load_atomic_flat_p3_seq_cst
150    ; GFX12: liveins: $vgpr0_vgpr1
151    ; GFX12-NEXT: {{  $}}
152    ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
153    ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3))
154    ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](p3)
155    %0:vgpr(p0) = COPY $vgpr0_vgpr1
156    %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst (p3), align 4, addrspace 0)
157    $vgpr0 = COPY %1
158
159...
160
161---
162
163name: load_atomic_flat_s64_seq_cst
164legalized:       true
165regBankSelected: true
166tracksRegLiveness: true
167
168body: |
169  bb.0:
170    liveins:  $vgpr0_vgpr1
171
172    ; GFX7-LABEL: name: load_atomic_flat_s64_seq_cst
173    ; GFX7: liveins: $vgpr0_vgpr1
174    ; GFX7-NEXT: {{  $}}
175    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
176    ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64))
177    ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
178    ;
179    ; GFX9-LABEL: name: load_atomic_flat_s64_seq_cst
180    ; GFX9: liveins: $vgpr0_vgpr1
181    ; GFX9-NEXT: {{  $}}
182    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
183    ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64))
184    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
185    ;
186    ; GFX10-LABEL: name: load_atomic_flat_s64_seq_cst
187    ; GFX10: liveins: $vgpr0_vgpr1
188    ; GFX10-NEXT: {{  $}}
189    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
190    ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64))
191    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
192    ;
193    ; GFX11-LABEL: name: load_atomic_flat_s64_seq_cst
194    ; GFX11: liveins: $vgpr0_vgpr1
195    ; GFX11-NEXT: {{  $}}
196    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
197    ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64))
198    ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
199    ;
200    ; GFX12-LABEL: name: load_atomic_flat_s64_seq_cst
201    ; GFX12: liveins: $vgpr0_vgpr1
202    ; GFX12-NEXT: {{  $}}
203    ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
204    ; GFX12-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64))
205    ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
206    %0:vgpr(p0) = COPY $vgpr0_vgpr1
207    %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst (s64), align 8, addrspace 0)
208    $vgpr0_vgpr1 = COPY %1
209
210...
211
212---
213
214name: load_atomic_flat_v2s32_seq_cst
215legalized:       true
216regBankSelected: true
217tracksRegLiveness: true
218
219body: |
220  bb.0:
221    liveins:  $vgpr0_vgpr1
222
223    ; GFX7-LABEL: name: load_atomic_flat_v2s32_seq_cst
224    ; GFX7: liveins: $vgpr0_vgpr1
225    ; GFX7-NEXT: {{  $}}
226    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
227    ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>))
228    ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
229    ;
230    ; GFX9-LABEL: name: load_atomic_flat_v2s32_seq_cst
231    ; GFX9: liveins: $vgpr0_vgpr1
232    ; GFX9-NEXT: {{  $}}
233    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
234    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>))
235    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
236    ;
237    ; GFX10-LABEL: name: load_atomic_flat_v2s32_seq_cst
238    ; GFX10: liveins: $vgpr0_vgpr1
239    ; GFX10-NEXT: {{  $}}
240    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
241    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>))
242    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
243    ;
244    ; GFX11-LABEL: name: load_atomic_flat_v2s32_seq_cst
245    ; GFX11: liveins: $vgpr0_vgpr1
246    ; GFX11-NEXT: {{  $}}
247    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
248    ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>))
249    ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
250    ;
251    ; GFX12-LABEL: name: load_atomic_flat_v2s32_seq_cst
252    ; GFX12: liveins: $vgpr0_vgpr1
253    ; GFX12-NEXT: {{  $}}
254    ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
255    ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>))
256    ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
257    %0:vgpr(p0) = COPY $vgpr0_vgpr1
258    %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 0)
259    $vgpr0_vgpr1 = COPY %1
260
261...
262
263---
264
265name: load_atomic_flat_v4s16_seq_cst
266legalized:       true
267regBankSelected: true
268tracksRegLiveness: true
269
270body: |
271  bb.0:
272    liveins:  $vgpr0_vgpr1
273
274    ; GFX7-LABEL: name: load_atomic_flat_v4s16_seq_cst
275    ; GFX7: liveins: $vgpr0_vgpr1
276    ; GFX7-NEXT: {{  $}}
277    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
278    ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>))
279    ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
280    ;
281    ; GFX9-LABEL: name: load_atomic_flat_v4s16_seq_cst
282    ; GFX9: liveins: $vgpr0_vgpr1
283    ; GFX9-NEXT: {{  $}}
284    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
285    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>))
286    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
287    ;
288    ; GFX10-LABEL: name: load_atomic_flat_v4s16_seq_cst
289    ; GFX10: liveins: $vgpr0_vgpr1
290    ; GFX10-NEXT: {{  $}}
291    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
292    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>))
293    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
294    ;
295    ; GFX11-LABEL: name: load_atomic_flat_v4s16_seq_cst
296    ; GFX11: liveins: $vgpr0_vgpr1
297    ; GFX11-NEXT: {{  $}}
298    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
299    ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>))
300    ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
301    ;
302    ; GFX12-LABEL: name: load_atomic_flat_v4s16_seq_cst
303    ; GFX12: liveins: $vgpr0_vgpr1
304    ; GFX12-NEXT: {{  $}}
305    ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
306    ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>))
307    ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
308    %0:vgpr(p0) = COPY $vgpr0_vgpr1
309    %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst (<4 x s16>), align 8, addrspace 0)
310    $vgpr0_vgpr1 = COPY %1
311
312...
313
314---
315
316name: load_atomic_flat_p1_seq_cst
317legalized:       true
318regBankSelected: true
319tracksRegLiveness: true
320
321body: |
322  bb.0:
323    liveins:  $vgpr0_vgpr1
324
325    ; GFX7-LABEL: name: load_atomic_flat_p1_seq_cst
326    ; GFX7: liveins: $vgpr0_vgpr1
327    ; GFX7-NEXT: {{  $}}
328    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
329    ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1))
330    ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
331    ;
332    ; GFX9-LABEL: name: load_atomic_flat_p1_seq_cst
333    ; GFX9: liveins: $vgpr0_vgpr1
334    ; GFX9-NEXT: {{  $}}
335    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
336    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1))
337    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
338    ;
339    ; GFX10-LABEL: name: load_atomic_flat_p1_seq_cst
340    ; GFX10: liveins: $vgpr0_vgpr1
341    ; GFX10-NEXT: {{  $}}
342    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
343    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1))
344    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
345    ;
346    ; GFX11-LABEL: name: load_atomic_flat_p1_seq_cst
347    ; GFX11: liveins: $vgpr0_vgpr1
348    ; GFX11-NEXT: {{  $}}
349    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
350    ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1))
351    ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
352    ;
353    ; GFX12-LABEL: name: load_atomic_flat_p1_seq_cst
354    ; GFX12: liveins: $vgpr0_vgpr1
355    ; GFX12-NEXT: {{  $}}
356    ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
357    ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1))
358    ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
359    %0:vgpr(p0) = COPY $vgpr0_vgpr1
360    %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst (p1), align 8, addrspace 0)
361    $vgpr0_vgpr1 = COPY %1
362
363...
364
365---
366
367name: load_atomic_flat_p0_seq_cst
368legalized:       true
369regBankSelected: true
370tracksRegLiveness: true
371
372body: |
373  bb.0:
374    liveins:  $vgpr0_vgpr1
375
376    ; GFX7-LABEL: name: load_atomic_flat_p0_seq_cst
377    ; GFX7: liveins: $vgpr0_vgpr1
378    ; GFX7-NEXT: {{  $}}
379    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
380    ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0))
381    ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
382    ;
383    ; GFX9-LABEL: name: load_atomic_flat_p0_seq_cst
384    ; GFX9: liveins: $vgpr0_vgpr1
385    ; GFX9-NEXT: {{  $}}
386    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
387    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0))
388    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
389    ;
390    ; GFX10-LABEL: name: load_atomic_flat_p0_seq_cst
391    ; GFX10: liveins: $vgpr0_vgpr1
392    ; GFX10-NEXT: {{  $}}
393    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
394    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0))
395    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
396    ;
397    ; GFX11-LABEL: name: load_atomic_flat_p0_seq_cst
398    ; GFX11: liveins: $vgpr0_vgpr1
399    ; GFX11-NEXT: {{  $}}
400    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
401    ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0))
402    ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
403    ;
404    ; GFX12-LABEL: name: load_atomic_flat_p0_seq_cst
405    ; GFX12: liveins: $vgpr0_vgpr1
406    ; GFX12-NEXT: {{  $}}
407    ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
408    ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0))
409    ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
410    %0:vgpr(p0) = COPY $vgpr0_vgpr1
411    %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst (p0), align 8, addrspace 0)
412    $vgpr0_vgpr1 = COPY %1
413
414...
415
416---
417
418name: load_atomic_flat_s32_seq_cst_gep_m2048
419legalized:       true
420regBankSelected: true
421tracksRegLiveness: true
422
423body: |
424  bb.0:
425    liveins:  $vgpr0_vgpr1
426
427    ; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048
428    ; GFX7: liveins: $vgpr0_vgpr1
429    ; GFX7-NEXT: {{  $}}
430    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
431    ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec
432    ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
433    ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
434    ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
435    ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
436    ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
437    ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
438    ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
439    ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
440    ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
441    ;
442    ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048
443    ; GFX9: liveins: $vgpr0_vgpr1
444    ; GFX9-NEXT: {{  $}}
445    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
446    ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec
447    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
448    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
449    ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
450    ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
451    ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
452    ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
453    ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
454    ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
455    ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
456    ;
457    ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048
458    ; GFX10: liveins: $vgpr0_vgpr1
459    ; GFX10-NEXT: {{  $}}
460    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
461    ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec
462    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
463    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
464    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
465    ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
466    ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
467    ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
468    ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
469    ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
470    ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
471    ;
472    ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048
473    ; GFX11: liveins: $vgpr0_vgpr1
474    ; GFX11-NEXT: {{  $}}
475    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
476    ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec
477    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
478    ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
479    ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
480    ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
481    ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
482    ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
483    ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
484    ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
485    ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
486    ;
487    ; GFX12-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048
488    ; GFX12: liveins: $vgpr0_vgpr1
489    ; GFX12-NEXT: {{  $}}
490    ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
491    ; GFX12-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
492    ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
493    %0:vgpr(p0) = COPY $vgpr0_vgpr1
494    %1:vgpr(s64) = G_CONSTANT i64 -2048
495    %2:vgpr(p0) = G_PTR_ADD %0, %1
496    %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 0)
497    $vgpr0 = COPY %3
498
499...
500
501---
502
503name: load_atomic_flat_s32_seq_cst_gep_4095
504legalized:       true
505regBankSelected: true
506tracksRegLiveness: true
507
508body: |
509  bb.0:
510    liveins:  $vgpr0_vgpr1
511
512    ; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095
513    ; GFX7: liveins: $vgpr0_vgpr1
514    ; GFX7-NEXT: {{  $}}
515    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
516    ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec
517    ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
518    ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
519    ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
520    ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
521    ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
522    ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
523    ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
524    ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
525    ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
526    ;
527    ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095
528    ; GFX9: liveins: $vgpr0_vgpr1
529    ; GFX9-NEXT: {{  $}}
530    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
531    ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
532    ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
533    ;
534    ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095
535    ; GFX10: liveins: $vgpr0_vgpr1
536    ; GFX10-NEXT: {{  $}}
537    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
538    ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec
539    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
540    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
541    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
542    ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
543    ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
544    ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
545    ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
546    ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
547    ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
548    ;
549    ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095
550    ; GFX11: liveins: $vgpr0_vgpr1
551    ; GFX11-NEXT: {{  $}}
552    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
553    ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
554    ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
555    ;
556    ; GFX12-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095
557    ; GFX12: liveins: $vgpr0_vgpr1
558    ; GFX12-NEXT: {{  $}}
559    ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
560    ; GFX12-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
561    ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
562    %0:vgpr(p0) = COPY $vgpr0_vgpr1
563    %1:vgpr(s64) = G_CONSTANT i64 4095
564    %2:vgpr(p0) = G_PTR_ADD %0, %1
565    %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 0)
566    $vgpr0 = COPY %3
567
568...
569
570