Lines Matching +full:0 +full:xc

18 ; GFX6:       ; %bb.0: ; %entry
20 ; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
21 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
22 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
25 ; GFX6-NEXT: s_mov_b32 s12, 0x100f000
37 ; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0
38 ; GFX6-NEXT: s_waitcnt vmcnt(0)
39 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
43 ; GFX7: ; %bb.0: ; %entry
44 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
45 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2
46 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
49 ; GFX7-NEXT: flat_load_dword v2, v[0:1]
52 ; GFX7-NEXT: s_waitcnt vmcnt(0)
53 ; GFX7-NEXT: flat_store_dword v[0:1], v2
57 ; GFX10-WGP: ; %bb.0: ; %entry
58 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
59 ; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
60 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
61 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
63 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
68 ; GFX10-CU: ; %bb.0: ; %entry
69 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
70 ; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
71 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
72 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
74 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
79 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
80 ; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5]
81 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
82 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
83 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
86 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000
98 ; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0
99 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
100 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
104 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
105 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
106 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
107 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
108 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
110 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
115 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
116 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
117 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
118 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
119 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
121 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
126 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
127 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
128 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
129 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
130 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
132 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
133 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
137 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
138 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
139 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
140 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
141 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
143 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
144 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
148 ; GFX11-WGP: ; %bb.0: ; %entry
149 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
150 ; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
151 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
152 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
154 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
155 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
159 ; GFX11-CU: ; %bb.0: ; %entry
160 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
161 ; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
162 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
163 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
165 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
166 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
170 ; GFX12-WGP: ; %bb.0: ; %entry
171 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
172 ; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
173 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
174 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
176 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
177 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
181 ; GFX12-CU: ; %bb.0: ; %entry
182 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
183 ; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
184 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
185 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
187 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
188 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
199 ; GFX6: ; %bb.0: ; %entry
201 ; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
202 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
203 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
206 ; GFX6-NEXT: s_mov_b32 s12, 0x100f000
218 ; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0
219 ; GFX6-NEXT: s_waitcnt vmcnt(0)
220 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
224 ; GFX7: ; %bb.0: ; %entry
225 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
226 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2
227 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
230 ; GFX7-NEXT: flat_load_dword v2, v[0:1]
233 ; GFX7-NEXT: s_waitcnt vmcnt(0)
234 ; GFX7-NEXT: flat_store_dword v[0:1], v2
238 ; GFX10-WGP: ; %bb.0: ; %entry
239 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
240 ; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
241 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
242 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
244 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
249 ; GFX10-CU: ; %bb.0: ; %entry
250 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
251 ; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
252 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
253 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
255 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
260 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
261 ; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5]
262 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
263 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
264 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
267 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000
279 ; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0
280 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
281 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
285 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
286 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
287 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
288 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
289 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
291 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
296 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
297 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
298 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
299 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
300 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
302 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
307 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
308 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
309 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
310 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
311 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
313 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
314 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
318 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
319 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
320 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
321 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
322 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
324 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
325 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
329 ; GFX11-WGP: ; %bb.0: ; %entry
330 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
331 ; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
332 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
333 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
335 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
336 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
340 ; GFX11-CU: ; %bb.0: ; %entry
341 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
342 ; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
343 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
344 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
346 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
347 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
351 ; GFX12-WGP: ; %bb.0: ; %entry
352 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
353 ; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
354 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
355 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
357 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
358 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
362 ; GFX12-CU: ; %bb.0: ; %entry
363 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
364 ; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
365 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
366 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
368 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
369 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
380 ; GFX6: ; %bb.0: ; %entry
382 ; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
383 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
384 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
387 ; GFX6-NEXT: s_mov_b32 s12, 0x100f000
399 ; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0
400 ; GFX6-NEXT: s_waitcnt vmcnt(0)
401 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
405 ; GFX7: ; %bb.0: ; %entry
406 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
407 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2
408 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
411 ; GFX7-NEXT: flat_load_dword v2, v[0:1]
414 ; GFX7-NEXT: s_waitcnt vmcnt(0)
415 ; GFX7-NEXT: flat_store_dword v[0:1], v2
419 ; GFX10-WGP: ; %bb.0: ; %entry
420 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
421 ; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
422 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
423 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
425 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
430 ; GFX10-CU: ; %bb.0: ; %entry
431 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
432 ; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
433 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
434 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
436 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
441 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
442 ; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5]
443 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
444 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
445 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
448 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000
460 ; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0
461 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
462 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
466 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
467 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
468 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
469 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
470 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
472 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
477 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
478 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
479 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
480 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
481 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
483 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
488 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
489 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
490 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
491 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
492 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
494 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
495 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
499 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
500 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
501 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
502 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
503 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
505 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
506 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
510 ; GFX11-WGP: ; %bb.0: ; %entry
511 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
512 ; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
513 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
514 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
516 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
517 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
521 ; GFX11-CU: ; %bb.0: ; %entry
522 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
523 ; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
524 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
525 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
527 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
528 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
532 ; GFX12-WGP: ; %bb.0: ; %entry
533 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
534 ; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
535 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
536 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
538 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
539 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
543 ; GFX12-CU: ; %bb.0: ; %entry
544 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
545 ; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
546 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
547 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
549 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
550 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
561 ; GFX6: ; %bb.0: ; %entry
563 ; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
564 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
565 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
568 ; GFX6-NEXT: s_mov_b32 s12, 0x100f000
580 ; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0
581 ; GFX6-NEXT: s_waitcnt vmcnt(0)
582 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
586 ; GFX7: ; %bb.0: ; %entry
587 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
588 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2
589 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
592 ; GFX7-NEXT: flat_load_dword v2, v[0:1]
595 ; GFX7-NEXT: s_waitcnt vmcnt(0)
596 ; GFX7-NEXT: flat_store_dword v[0:1], v2
600 ; GFX10-WGP: ; %bb.0: ; %entry
601 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
602 ; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
603 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
604 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
606 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
611 ; GFX10-CU: ; %bb.0: ; %entry
612 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
613 ; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
614 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
615 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
617 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
622 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
623 ; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5]
624 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
625 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
626 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
629 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000
641 ; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0
642 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
643 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
647 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
648 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
649 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
650 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
651 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
653 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
658 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
659 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
660 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
661 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
662 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
664 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
669 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
670 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
671 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
672 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
673 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
675 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
676 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
680 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
681 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
682 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
683 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
684 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
686 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
687 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
691 ; GFX11-WGP: ; %bb.0: ; %entry
692 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
693 ; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
694 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
695 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
697 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
698 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
702 ; GFX11-CU: ; %bb.0: ; %entry
703 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
704 ; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
705 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
706 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
708 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
709 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
713 ; GFX12-WGP: ; %bb.0: ; %entry
714 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
715 ; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
716 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
717 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
719 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
720 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
724 ; GFX12-CU: ; %bb.0: ; %entry
725 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
726 ; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
727 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
728 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
730 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
731 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
742 ; GFX6: ; %bb.0: ; %entry
744 ; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0
745 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
746 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
749 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
756 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
760 ; GFX7: ; %bb.0: ; %entry
761 ; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
762 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2
763 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
767 ; GFX7-NEXT: flat_store_dword v[0:1], v2
771 ; GFX10-WGP: ; %bb.0: ; %entry
772 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
773 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
774 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
775 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
781 ; GFX10-CU: ; %bb.0: ; %entry
782 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
783 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
784 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
785 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
791 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
792 ; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5]
793 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0
794 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
795 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
798 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
805 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
809 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
810 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
811 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
812 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
813 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
819 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
820 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
821 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
822 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
823 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
829 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
830 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
831 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
832 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
833 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
835 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
839 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
840 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
841 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
842 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
843 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
845 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
849 ; GFX11-WGP: ; %bb.0: ; %entry
850 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
851 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
852 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
853 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
855 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
859 ; GFX11-CU: ; %bb.0: ; %entry
860 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
861 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
862 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
863 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
865 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
869 ; GFX12-WGP: ; %bb.0: ; %entry
870 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
871 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
872 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
873 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
875 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
879 ; GFX12-CU: ; %bb.0: ; %entry
880 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
881 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
882 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
883 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
885 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
895 ; GFX6: ; %bb.0: ; %entry
897 ; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0
898 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
899 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
902 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
909 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
913 ; GFX7: ; %bb.0: ; %entry
914 ; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
915 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2
916 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
920 ; GFX7-NEXT: flat_store_dword v[0:1], v2
924 ; GFX10-WGP: ; %bb.0: ; %entry
925 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
926 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
927 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
928 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
934 ; GFX10-CU: ; %bb.0: ; %entry
935 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
936 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
937 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
938 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
944 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
945 ; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5]
946 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0
947 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
948 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
951 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
958 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
962 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
963 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
964 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
965 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
966 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
972 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
973 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
974 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
975 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
976 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
982 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
983 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
984 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
985 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
986 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
988 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
992 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
993 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
994 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
995 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
996 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
998 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
1002 ; GFX11-WGP: ; %bb.0: ; %entry
1003 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
1004 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
1005 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
1006 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
1008 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
1012 ; GFX11-CU: ; %bb.0: ; %entry
1013 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
1014 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
1015 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
1016 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
1018 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
1022 ; GFX12-WGP: ; %bb.0: ; %entry
1023 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
1024 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
1025 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
1026 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
1028 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
1032 ; GFX12-CU: ; %bb.0: ; %entry
1033 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
1034 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
1035 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
1036 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
1038 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
1048 ; GFX6: ; %bb.0: ; %entry
1050 ; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0
1051 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
1052 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1055 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
1062 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
1066 ; GFX7: ; %bb.0: ; %entry
1067 ; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
1068 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2
1069 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1073 ; GFX7-NEXT: flat_store_dword v[0:1], v2
1077 ; GFX10-WGP: ; %bb.0: ; %entry
1078 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
1079 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
1080 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
1081 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
1087 ; GFX10-CU: ; %bb.0: ; %entry
1088 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
1089 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
1090 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
1091 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
1097 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
1098 ; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5]
1099 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0
1100 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
1101 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
1104 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
1111 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
1115 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
1116 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
1117 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
1118 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1119 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1125 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
1126 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
1127 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
1128 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1129 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1135 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
1136 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
1137 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
1138 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1139 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1141 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
1145 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
1146 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
1147 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
1148 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1149 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1151 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
1155 ; GFX11-WGP: ; %bb.0: ; %entry
1156 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
1157 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
1158 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
1159 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
1161 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
1165 ; GFX11-CU: ; %bb.0: ; %entry
1166 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
1167 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
1168 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
1169 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
1171 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
1175 ; GFX12-WGP: ; %bb.0: ; %entry
1176 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
1177 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
1178 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
1179 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
1181 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
1185 ; GFX12-CU: ; %bb.0: ; %entry
1186 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
1187 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
1188 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
1189 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
1191 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
1201 ; GFX6: ; %bb.0: ; %entry
1203 ; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0
1204 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
1205 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1208 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
1215 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
1219 ; GFX7: ; %bb.0: ; %entry
1220 ; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
1221 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2
1222 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1226 ; GFX7-NEXT: flat_store_dword v[0:1], v2
1230 ; GFX10-WGP: ; %bb.0: ; %entry
1231 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
1232 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
1233 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
1234 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
1240 ; GFX10-CU: ; %bb.0: ; %entry
1241 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
1242 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
1243 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
1244 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
1250 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
1251 ; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5]
1252 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0
1253 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
1254 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
1257 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
1264 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
1268 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
1269 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
1270 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
1271 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1272 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1278 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
1279 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
1280 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
1281 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1282 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1288 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
1289 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
1290 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
1291 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1292 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1294 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
1298 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
1299 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
1300 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
1301 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1302 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1304 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
1308 ; GFX11-WGP: ; %bb.0: ; %entry
1309 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
1310 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
1311 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
1312 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
1314 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
1318 ; GFX11-CU: ; %bb.0: ; %entry
1319 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
1320 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
1321 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
1322 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
1324 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
1328 ; GFX12-WGP: ; %bb.0: ; %entry
1329 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
1330 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
1331 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
1332 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
1334 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
1338 ; GFX12-CU: ; %bb.0: ; %entry
1339 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
1340 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
1341 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
1342 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
1344 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
1354 ; GFX6: ; %bb.0: ; %entry
1355 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1356 ; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2
1357 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1360 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
1367 ; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0
1371 ; GFX7: ; %bb.0: ; %entry
1372 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
1373 ; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2
1374 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1378 ; GFX7-NEXT: flat_atomic_swap v[0:1], v2
1382 ; GFX10-WGP: ; %bb.0: ; %entry
1383 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
1384 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1385 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
1386 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
1392 ; GFX10-CU: ; %bb.0: ; %entry
1393 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
1394 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1395 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
1396 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
1402 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
1403 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1404 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2
1405 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
1408 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
1415 ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0
1419 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
1420 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1421 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1422 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
1423 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1429 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
1430 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1431 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1432 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
1433 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1439 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
1440 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1441 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1442 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
1443 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1445 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
1449 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
1450 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1451 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1452 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
1453 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1455 ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
1459 ; GFX11-WGP: ; %bb.0: ; %entry
1460 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
1461 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1462 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
1463 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
1465 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
1469 ; GFX11-CU: ; %bb.0: ; %entry
1470 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
1471 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1472 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
1473 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
1475 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
1479 ; GFX12-WGP: ; %bb.0: ; %entry
1480 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
1481 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1482 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
1483 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
1485 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
1489 ; GFX12-CU: ; %bb.0: ; %entry
1490 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
1491 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1492 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
1493 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
1495 ; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
1505 ; GFX6: ; %bb.0: ; %entry
1506 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1507 ; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2
1508 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1511 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
1518 ; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0
1522 ; GFX7: ; %bb.0: ; %entry
1523 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
1524 ; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2
1525 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1529 ; GFX7-NEXT: flat_atomic_swap v[0:1], v2
1533 ; GFX10-WGP: ; %bb.0: ; %entry
1534 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
1535 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1536 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
1537 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
1543 ; GFX10-CU: ; %bb.0: ; %entry
1544 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
1545 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1546 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
1547 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
1553 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
1554 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1555 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2
1556 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
1559 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
1566 ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0
1570 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
1571 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1572 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1573 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
1574 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1580 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
1581 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1582 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1583 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
1584 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1590 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
1591 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1592 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1593 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
1594 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1596 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
1600 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
1601 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1602 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1603 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
1604 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1606 ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
1610 ; GFX11-WGP: ; %bb.0: ; %entry
1611 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
1612 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1613 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
1614 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
1616 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
1620 ; GFX11-CU: ; %bb.0: ; %entry
1621 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
1622 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1623 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
1624 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
1626 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
1630 ; GFX12-WGP: ; %bb.0: ; %entry
1631 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
1632 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1633 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
1634 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
1636 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
1640 ; GFX12-CU: ; %bb.0: ; %entry
1641 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
1642 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1643 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
1644 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
1646 ; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
1656 ; GFX6: ; %bb.0: ; %entry
1657 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1658 ; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2
1659 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1662 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
1669 ; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0
1673 ; GFX7: ; %bb.0: ; %entry
1674 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
1675 ; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2
1676 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1680 ; GFX7-NEXT: flat_atomic_swap v[0:1], v2
1684 ; GFX10-WGP: ; %bb.0: ; %entry
1685 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
1686 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1687 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
1688 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
1694 ; GFX10-CU: ; %bb.0: ; %entry
1695 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
1696 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1697 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
1698 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
1704 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
1705 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1706 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2
1707 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
1710 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
1717 ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0
1721 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
1722 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1723 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1724 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
1725 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1731 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
1732 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1733 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1734 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
1735 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1741 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
1742 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1743 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1744 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
1745 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1747 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
1751 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
1752 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1753 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1754 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
1755 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1757 ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
1761 ; GFX11-WGP: ; %bb.0: ; %entry
1762 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
1763 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1764 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
1765 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
1767 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
1771 ; GFX11-CU: ; %bb.0: ; %entry
1772 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
1773 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1774 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
1775 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
1777 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
1781 ; GFX12-WGP: ; %bb.0: ; %entry
1782 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
1783 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1784 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
1785 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
1787 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
1791 ; GFX12-CU: ; %bb.0: ; %entry
1792 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
1793 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1794 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
1795 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
1797 ; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
1807 ; GFX6: ; %bb.0: ; %entry
1808 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1809 ; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2
1810 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1813 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
1820 ; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0
1824 ; GFX7: ; %bb.0: ; %entry
1825 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
1826 ; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2
1827 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1831 ; GFX7-NEXT: flat_atomic_swap v[0:1], v2
1835 ; GFX10-WGP: ; %bb.0: ; %entry
1836 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
1837 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1838 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
1839 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
1845 ; GFX10-CU: ; %bb.0: ; %entry
1846 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
1847 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1848 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
1849 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
1855 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
1856 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1857 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2
1858 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
1861 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
1868 ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0
1872 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
1873 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1874 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1875 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
1876 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1882 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
1883 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1884 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1885 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
1886 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1892 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
1893 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1894 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1895 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
1896 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1898 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
1902 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
1903 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
1904 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1905 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
1906 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
1908 ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
1912 ; GFX11-WGP: ; %bb.0: ; %entry
1913 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
1914 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1915 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
1916 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
1918 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
1922 ; GFX11-CU: ; %bb.0: ; %entry
1923 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
1924 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1925 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
1926 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
1928 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
1932 ; GFX12-WGP: ; %bb.0: ; %entry
1933 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
1934 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1935 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
1936 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
1938 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
1942 ; GFX12-CU: ; %bb.0: ; %entry
1943 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
1944 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1945 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
1946 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
1948 ; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
1958 ; GFX6: ; %bb.0: ; %entry
1959 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1960 ; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2
1961 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1964 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
1971 ; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0
1975 ; GFX7: ; %bb.0: ; %entry
1976 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
1977 ; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2
1978 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1982 ; GFX7-NEXT: flat_atomic_swap v[0:1], v2
1986 ; GFX10-WGP: ; %bb.0: ; %entry
1987 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
1988 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1989 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
1990 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
1996 ; GFX10-CU: ; %bb.0: ; %entry
1997 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
1998 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
1999 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
2000 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
2006 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
2007 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2008 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2
2009 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
2012 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
2019 ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0
2023 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
2024 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2025 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2026 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
2027 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2033 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
2034 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2035 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2036 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
2037 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2043 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
2044 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2045 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2046 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
2047 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2049 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
2053 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
2054 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2055 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2056 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
2057 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2059 ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
2063 ; GFX11-WGP: ; %bb.0: ; %entry
2064 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
2065 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2066 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
2067 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
2069 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
2073 ; GFX11-CU: ; %bb.0: ; %entry
2074 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
2075 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2076 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
2077 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
2079 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
2083 ; GFX12-WGP: ; %bb.0: ; %entry
2084 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
2085 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2086 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
2087 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
2089 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
2093 ; GFX12-CU: ; %bb.0: ; %entry
2094 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
2095 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2096 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
2097 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
2099 ; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
2109 ; GFX6: ; %bb.0: ; %entry
2110 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2111 ; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2
2112 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2115 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
2122 ; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc
2123 ; GFX6-NEXT: s_waitcnt vmcnt(0)
2124 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
2128 ; GFX7: ; %bb.0: ; %entry
2129 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2130 ; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
2131 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
2135 ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc
2138 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2139 ; GFX7-NEXT: flat_store_dword v[0:1], v2
2143 ; GFX10-WGP: ; %bb.0: ; %entry
2144 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
2145 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2146 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
2147 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
2150 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
2155 ; GFX10-CU: ; %bb.0: ; %entry
2156 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
2157 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2158 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
2159 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
2162 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
2167 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
2168 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2169 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2
2170 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
2173 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
2180 ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc
2181 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
2182 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
2186 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
2187 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2188 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2189 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
2190 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2193 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
2198 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
2199 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2200 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2201 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
2202 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2205 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
2210 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
2211 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2212 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2213 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
2214 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2216 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0
2217 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
2218 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
2222 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
2223 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2224 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2225 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
2226 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2228 ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0
2229 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
2230 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
2234 ; GFX11-WGP: ; %bb.0: ; %entry
2235 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
2236 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2237 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
2238 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
2240 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
2241 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
2242 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
2246 ; GFX11-CU: ; %bb.0: ; %entry
2247 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
2248 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2249 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
2250 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
2252 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
2253 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
2254 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
2258 ; GFX12-WGP: ; %bb.0: ; %entry
2259 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
2260 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2261 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
2262 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
2264 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
2265 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
2266 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
2270 ; GFX12-CU: ; %bb.0: ; %entry
2271 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
2272 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2273 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
2274 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
2276 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
2277 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
2278 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
2289 ; GFX6: ; %bb.0: ; %entry
2290 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2291 ; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2
2292 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2295 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
2302 ; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc
2303 ; GFX6-NEXT: s_waitcnt vmcnt(0)
2304 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
2308 ; GFX7: ; %bb.0: ; %entry
2309 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2310 ; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
2311 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
2315 ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc
2318 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2319 ; GFX7-NEXT: flat_store_dword v[0:1], v2
2323 ; GFX10-WGP: ; %bb.0: ; %entry
2324 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
2325 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2326 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
2327 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
2330 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
2335 ; GFX10-CU: ; %bb.0: ; %entry
2336 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
2337 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2338 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
2339 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
2342 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
2347 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
2348 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2349 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2
2350 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
2353 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
2360 ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc
2361 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
2362 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
2366 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
2367 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2368 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2369 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
2370 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2373 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
2378 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
2379 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2380 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2381 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
2382 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2385 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
2390 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
2391 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2392 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2393 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
2394 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2396 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0
2397 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
2398 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
2402 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
2403 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2404 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2405 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
2406 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2408 ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0
2409 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
2410 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
2414 ; GFX11-WGP: ; %bb.0: ; %entry
2415 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
2416 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2417 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
2418 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
2420 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
2421 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
2422 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
2426 ; GFX11-CU: ; %bb.0: ; %entry
2427 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
2428 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2429 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
2430 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
2432 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
2433 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
2434 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
2438 ; GFX12-WGP: ; %bb.0: ; %entry
2439 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
2440 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2441 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
2442 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
2444 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
2445 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
2446 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
2450 ; GFX12-CU: ; %bb.0: ; %entry
2451 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
2452 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2453 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
2454 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
2456 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
2457 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
2458 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
2469 ; GFX6: ; %bb.0: ; %entry
2470 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2471 ; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2
2472 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2475 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
2482 ; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc
2483 ; GFX6-NEXT: s_waitcnt vmcnt(0)
2484 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
2488 ; GFX7: ; %bb.0: ; %entry
2489 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2490 ; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
2491 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
2495 ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc
2498 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2499 ; GFX7-NEXT: flat_store_dword v[0:1], v2
2503 ; GFX10-WGP: ; %bb.0: ; %entry
2504 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
2505 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2506 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
2507 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
2510 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
2515 ; GFX10-CU: ; %bb.0: ; %entry
2516 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
2517 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2518 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
2519 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
2522 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
2527 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
2528 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2529 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2
2530 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
2533 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
2540 ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc
2541 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
2542 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
2546 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
2547 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2548 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2549 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
2550 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2553 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
2558 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
2559 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2560 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2561 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
2562 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2565 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
2570 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
2571 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2572 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2573 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
2574 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2576 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0
2577 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
2578 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
2582 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
2583 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2584 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2585 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
2586 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2588 ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0
2589 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
2590 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
2594 ; GFX11-WGP: ; %bb.0: ; %entry
2595 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
2596 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2597 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
2598 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
2600 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
2601 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
2602 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
2606 ; GFX11-CU: ; %bb.0: ; %entry
2607 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
2608 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2609 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
2610 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
2612 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
2613 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
2614 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
2618 ; GFX12-WGP: ; %bb.0: ; %entry
2619 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
2620 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2621 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
2622 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
2624 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
2625 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
2626 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
2630 ; GFX12-CU: ; %bb.0: ; %entry
2631 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
2632 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2633 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
2634 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
2636 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
2637 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
2638 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
2649 ; GFX6: ; %bb.0: ; %entry
2651 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
2652 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
2653 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
2654 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2657 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
2667 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
2671 ; GFX7: ; %bb.0: ; %entry
2673 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
2674 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
2675 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
2677 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
2692 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
2696 ; GFX10-WGP: ; %bb.0: ; %entry
2697 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
2698 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2699 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
2700 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
2701 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
2710 ; GFX10-CU: ; %bb.0: ; %entry
2711 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
2712 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2713 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
2714 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
2715 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
2724 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
2726 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
2727 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
2728 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
2729 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
2732 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
2742 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
2746 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
2747 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2748 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2749 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
2750 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
2751 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2760 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
2761 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2762 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2763 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
2764 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
2765 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2774 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
2775 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2776 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2777 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
2778 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
2779 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2784 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
2788 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
2789 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2790 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2791 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
2792 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
2793 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2798 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
2802 ; GFX11-WGP: ; %bb.0: ; %entry
2803 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
2804 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2805 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
2806 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
2807 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
2812 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
2816 ; GFX11-CU: ; %bb.0: ; %entry
2817 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
2818 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2819 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
2820 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
2821 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
2826 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
2830 ; GFX12-WGP: ; %bb.0: ; %entry
2831 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
2832 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2833 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
2834 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
2835 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
2840 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
2844 ; GFX12-CU: ; %bb.0: ; %entry
2845 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
2846 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
2847 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
2848 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
2849 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
2854 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
2865 ; GFX6: ; %bb.0: ; %entry
2867 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
2868 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
2869 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
2870 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2873 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
2883 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
2887 ; GFX7: ; %bb.0: ; %entry
2889 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
2890 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
2891 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
2893 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
2908 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
2912 ; GFX10-WGP: ; %bb.0: ; %entry
2913 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
2914 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2915 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
2916 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
2917 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
2926 ; GFX10-CU: ; %bb.0: ; %entry
2927 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
2928 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2929 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
2930 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
2931 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
2940 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
2942 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
2943 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
2944 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
2945 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
2948 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
2958 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
2962 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
2963 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2964 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2965 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
2966 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
2967 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2976 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
2977 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2978 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
2979 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
2980 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
2981 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
2990 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
2991 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
2992 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2993 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
2994 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
2995 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
3000 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
3004 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
3005 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
3006 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
3007 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
3008 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
3009 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
3014 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
3018 ; GFX11-WGP: ; %bb.0: ; %entry
3019 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
3020 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3021 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
3022 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
3023 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
3028 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3032 ; GFX11-CU: ; %bb.0: ; %entry
3033 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
3034 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3035 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
3036 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
3037 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
3042 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3046 ; GFX12-WGP: ; %bb.0: ; %entry
3047 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
3048 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3049 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
3050 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
3051 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
3056 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3060 ; GFX12-CU: ; %bb.0: ; %entry
3061 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
3062 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3063 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
3064 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
3065 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
3070 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3081 ; GFX6: ; %bb.0: ; %entry
3083 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
3084 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
3085 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
3086 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
3089 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
3099 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
3103 ; GFX7: ; %bb.0: ; %entry
3105 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
3106 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
3107 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
3109 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
3124 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
3128 ; GFX10-WGP: ; %bb.0: ; %entry
3129 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
3130 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
3131 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
3132 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
3133 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
3142 ; GFX10-CU: ; %bb.0: ; %entry
3143 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
3144 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
3145 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
3146 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
3147 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
3156 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
3158 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
3159 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
3160 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
3161 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
3164 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
3174 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
3178 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
3179 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
3180 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
3181 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
3182 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
3183 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
3192 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
3193 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
3194 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
3195 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
3196 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
3197 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
3206 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
3207 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
3208 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
3209 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
3210 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
3211 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
3216 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
3220 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
3221 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
3222 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
3223 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
3224 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
3225 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
3230 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
3234 ; GFX11-WGP: ; %bb.0: ; %entry
3235 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
3236 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3237 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
3238 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
3239 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
3244 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3248 ; GFX11-CU: ; %bb.0: ; %entry
3249 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
3250 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3251 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
3252 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
3253 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
3258 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3262 ; GFX12-WGP: ; %bb.0: ; %entry
3263 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
3264 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3265 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
3266 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
3267 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
3272 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3276 ; GFX12-CU: ; %bb.0: ; %entry
3277 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
3278 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3279 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
3280 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
3281 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
3286 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3297 ; GFX6: ; %bb.0: ; %entry
3299 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
3300 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
3301 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
3302 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
3305 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
3315 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
3319 ; GFX7: ; %bb.0: ; %entry
3321 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
3322 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
3323 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
3325 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
3340 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
3344 ; GFX10-WGP: ; %bb.0: ; %entry
3345 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
3346 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
3347 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
3348 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
3349 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
3358 ; GFX10-CU: ; %bb.0: ; %entry
3359 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
3360 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
3361 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
3362 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
3363 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
3372 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
3374 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
3375 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
3376 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
3377 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
3380 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
3390 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
3394 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
3395 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
3396 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
3397 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
3398 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
3399 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
3408 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
3409 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
3410 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
3411 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
3412 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
3413 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
3422 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
3423 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
3424 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
3425 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
3426 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
3427 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
3432 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
3436 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
3437 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
3438 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
3439 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
3440 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
3441 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
3446 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
3450 ; GFX11-WGP: ; %bb.0: ; %entry
3451 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
3452 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3453 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
3454 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
3455 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
3460 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3464 ; GFX11-CU: ; %bb.0: ; %entry
3465 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
3466 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3467 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
3468 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
3469 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
3474 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3478 ; GFX12-WGP: ; %bb.0: ; %entry
3479 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
3480 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3481 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
3482 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
3483 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
3488 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3492 ; GFX12-CU: ; %bb.0: ; %entry
3493 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
3494 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3495 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
3496 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
3497 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
3502 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3513 ; GFX6: ; %bb.0: ; %entry
3515 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
3516 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
3517 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
3518 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
3521 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
3531 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
3535 ; GFX7: ; %bb.0: ; %entry
3537 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
3538 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
3539 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
3541 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
3556 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
3560 ; GFX10-WGP: ; %bb.0: ; %entry
3561 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
3562 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
3563 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
3564 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
3565 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
3574 ; GFX10-CU: ; %bb.0: ; %entry
3575 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
3576 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
3577 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
3578 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
3579 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
3588 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
3590 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
3591 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
3592 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
3593 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
3596 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
3606 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
3610 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
3611 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
3612 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
3613 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
3614 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
3615 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
3624 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
3625 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
3626 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
3627 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
3628 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
3629 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
3638 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
3639 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
3640 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
3641 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
3642 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
3643 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
3648 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
3652 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
3653 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
3654 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
3655 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
3656 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
3657 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
3662 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
3666 ; GFX11-WGP: ; %bb.0: ; %entry
3667 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
3668 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3669 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
3670 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
3671 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
3676 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3680 ; GFX11-CU: ; %bb.0: ; %entry
3681 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
3682 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3683 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
3684 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
3685 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
3690 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3694 ; GFX12-WGP: ; %bb.0: ; %entry
3695 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
3696 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3697 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
3698 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
3699 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
3704 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3708 ; GFX12-CU: ; %bb.0: ; %entry
3709 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
3710 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3711 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
3712 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
3713 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
3718 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3729 ; GFX6: ; %bb.0: ; %entry
3731 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
3732 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
3733 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
3734 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
3737 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
3747 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
3751 ; GFX7: ; %bb.0: ; %entry
3753 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
3754 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
3755 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
3757 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
3772 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
3776 ; GFX10-WGP: ; %bb.0: ; %entry
3777 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
3778 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
3779 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
3780 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
3781 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
3790 ; GFX10-CU: ; %bb.0: ; %entry
3791 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
3792 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
3793 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
3794 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
3795 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
3804 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
3806 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
3807 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
3808 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
3809 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
3812 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
3822 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
3826 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
3827 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
3828 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
3829 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
3830 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
3831 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
3840 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
3841 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
3842 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
3843 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
3844 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
3845 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
3854 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
3855 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
3856 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
3857 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
3858 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
3859 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
3864 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
3868 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
3869 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
3870 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
3871 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
3872 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
3873 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
3878 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
3882 ; GFX11-WGP: ; %bb.0: ; %entry
3883 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
3884 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3885 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
3886 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
3887 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
3892 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3896 ; GFX11-CU: ; %bb.0: ; %entry
3897 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
3898 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3899 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
3900 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
3901 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
3906 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3910 ; GFX12-WGP: ; %bb.0: ; %entry
3911 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
3912 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3913 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
3914 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
3915 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
3920 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3924 ; GFX12-CU: ; %bb.0: ; %entry
3925 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
3926 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
3927 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
3928 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
3929 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
3934 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3945 ; GFX6: ; %bb.0: ; %entry
3947 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
3948 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
3949 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
3950 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
3953 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
3963 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
3967 ; GFX7: ; %bb.0: ; %entry
3969 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
3970 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
3971 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
3973 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
3988 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
3992 ; GFX10-WGP: ; %bb.0: ; %entry
3993 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
3994 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
3995 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
3996 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
3997 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
4006 ; GFX10-CU: ; %bb.0: ; %entry
4007 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
4008 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
4009 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
4010 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
4011 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
4020 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
4022 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
4023 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
4024 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
4025 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
4028 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
4038 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
4042 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
4043 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4044 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
4045 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
4046 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
4047 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4056 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
4057 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4058 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
4059 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
4060 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
4061 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4070 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
4071 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4072 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
4073 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
4074 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
4075 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4080 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
4084 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
4085 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4086 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
4087 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
4088 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
4089 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4094 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
4098 ; GFX11-WGP: ; %bb.0: ; %entry
4099 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
4100 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
4101 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
4102 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
4103 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
4108 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4112 ; GFX11-CU: ; %bb.0: ; %entry
4113 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
4114 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
4115 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
4116 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
4117 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
4122 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4126 ; GFX12-WGP: ; %bb.0: ; %entry
4127 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
4128 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
4129 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
4130 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
4131 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
4136 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4140 ; GFX12-CU: ; %bb.0: ; %entry
4141 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
4142 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
4143 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
4144 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
4145 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
4150 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4161 ; GFX6: ; %bb.0: ; %entry
4163 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
4164 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
4165 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
4166 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
4169 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
4179 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
4183 ; GFX7: ; %bb.0: ; %entry
4185 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
4186 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
4187 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
4189 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
4204 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
4208 ; GFX10-WGP: ; %bb.0: ; %entry
4209 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
4210 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
4211 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
4212 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
4213 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
4222 ; GFX10-CU: ; %bb.0: ; %entry
4223 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
4224 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
4225 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
4226 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
4227 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
4236 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
4238 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
4239 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
4240 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
4241 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
4244 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
4254 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
4258 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
4259 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4260 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
4261 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
4262 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
4263 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4272 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
4273 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4274 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
4275 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
4276 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
4277 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4286 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
4287 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4288 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
4289 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
4290 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
4291 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4296 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
4300 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
4301 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4302 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
4303 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
4304 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
4305 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4310 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
4314 ; GFX11-WGP: ; %bb.0: ; %entry
4315 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
4316 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
4317 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
4318 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
4319 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
4324 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4328 ; GFX11-CU: ; %bb.0: ; %entry
4329 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
4330 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
4331 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
4332 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
4333 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
4338 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4342 ; GFX12-WGP: ; %bb.0: ; %entry
4343 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
4344 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
4345 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
4346 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
4347 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
4352 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4356 ; GFX12-CU: ; %bb.0: ; %entry
4357 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
4358 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
4359 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
4360 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
4361 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
4366 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4377 ; GFX6: ; %bb.0: ; %entry
4379 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
4380 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
4381 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
4382 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
4385 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
4395 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
4399 ; GFX7: ; %bb.0: ; %entry
4401 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
4402 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
4403 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
4405 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
4420 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
4424 ; GFX10-WGP: ; %bb.0: ; %entry
4425 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
4426 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
4427 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
4428 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
4429 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
4438 ; GFX10-CU: ; %bb.0: ; %entry
4439 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
4440 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
4441 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
4442 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
4443 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
4452 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
4454 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
4455 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
4456 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
4457 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
4460 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
4470 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
4474 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
4475 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4476 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
4477 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
4478 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
4479 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4488 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
4489 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4490 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
4491 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
4492 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
4493 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4502 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
4503 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4504 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
4505 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
4506 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
4507 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4512 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
4516 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
4517 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4518 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
4519 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
4520 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
4521 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4526 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
4530 ; GFX11-WGP: ; %bb.0: ; %entry
4531 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
4532 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
4533 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
4534 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
4535 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
4540 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4544 ; GFX11-CU: ; %bb.0: ; %entry
4545 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
4546 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
4547 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
4548 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
4549 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
4554 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4558 ; GFX12-WGP: ; %bb.0: ; %entry
4559 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
4560 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
4561 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
4562 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
4563 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
4568 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4572 ; GFX12-CU: ; %bb.0: ; %entry
4573 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
4574 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
4575 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
4576 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
4577 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
4582 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4593 ; GFX6: ; %bb.0: ; %entry
4595 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
4596 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
4597 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
4598 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
4601 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
4611 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
4615 ; GFX7: ; %bb.0: ; %entry
4617 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
4618 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
4619 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
4621 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
4636 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
4640 ; GFX10-WGP: ; %bb.0: ; %entry
4641 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
4642 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
4643 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
4644 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
4645 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
4654 ; GFX10-CU: ; %bb.0: ; %entry
4655 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
4656 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
4657 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
4658 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
4659 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
4668 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
4670 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
4671 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
4672 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
4673 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
4676 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
4686 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
4690 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
4691 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4692 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
4693 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
4694 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
4695 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4704 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
4705 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4706 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
4707 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
4708 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
4709 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4718 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
4719 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4720 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
4721 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
4722 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
4723 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4728 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
4732 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
4733 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4734 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
4735 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
4736 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
4737 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4742 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
4746 ; GFX11-WGP: ; %bb.0: ; %entry
4747 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
4748 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
4749 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
4750 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
4751 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
4756 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4760 ; GFX11-CU: ; %bb.0: ; %entry
4761 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
4762 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
4763 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
4764 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
4765 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
4770 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4774 ; GFX12-WGP: ; %bb.0: ; %entry
4775 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
4776 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
4777 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
4778 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
4779 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
4784 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4788 ; GFX12-CU: ; %bb.0: ; %entry
4789 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
4790 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
4791 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
4792 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
4793 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
4798 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4809 ; GFX6: ; %bb.0: ; %entry
4811 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
4812 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
4813 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
4814 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
4817 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
4827 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
4831 ; GFX7: ; %bb.0: ; %entry
4833 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
4834 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
4835 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
4837 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
4852 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
4856 ; GFX10-WGP: ; %bb.0: ; %entry
4857 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
4858 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
4859 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
4860 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
4861 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
4870 ; GFX10-CU: ; %bb.0: ; %entry
4871 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
4872 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
4873 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
4874 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
4875 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
4884 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
4886 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
4887 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
4888 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
4889 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
4892 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
4902 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
4906 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
4907 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4908 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
4909 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
4910 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
4911 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4920 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
4921 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4922 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
4923 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
4924 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
4925 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4934 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
4935 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4936 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
4937 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
4938 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
4939 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4944 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
4948 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
4949 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
4950 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
4951 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
4952 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
4953 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
4958 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
4962 ; GFX11-WGP: ; %bb.0: ; %entry
4963 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
4964 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
4965 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
4966 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
4967 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
4972 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4976 ; GFX11-CU: ; %bb.0: ; %entry
4977 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
4978 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
4979 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
4980 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
4981 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
4986 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4990 ; GFX12-WGP: ; %bb.0: ; %entry
4991 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
4992 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
4993 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
4994 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
4995 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
5000 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5004 ; GFX12-CU: ; %bb.0: ; %entry
5005 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
5006 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
5007 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
5008 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
5009 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
5014 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5025 ; GFX6: ; %bb.0: ; %entry
5027 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
5028 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
5029 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
5030 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
5033 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
5043 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
5047 ; GFX7: ; %bb.0: ; %entry
5049 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
5050 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
5051 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
5053 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
5068 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
5072 ; GFX10-WGP: ; %bb.0: ; %entry
5073 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
5074 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
5075 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
5076 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
5077 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
5086 ; GFX10-CU: ; %bb.0: ; %entry
5087 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
5088 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
5089 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
5090 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
5091 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
5100 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
5102 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
5103 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
5104 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
5105 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
5108 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
5118 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
5122 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
5123 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
5124 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
5125 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
5126 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
5127 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
5136 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
5137 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
5138 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
5139 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
5140 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
5141 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
5150 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
5151 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
5152 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
5153 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
5154 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
5155 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
5160 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
5164 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
5165 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
5166 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
5167 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
5168 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
5169 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
5174 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
5178 ; GFX11-WGP: ; %bb.0: ; %entry
5179 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
5180 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
5181 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
5182 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
5183 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
5188 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5192 ; GFX11-CU: ; %bb.0: ; %entry
5193 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
5194 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
5195 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
5196 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
5197 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
5202 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5206 ; GFX12-WGP: ; %bb.0: ; %entry
5207 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
5208 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
5209 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
5210 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
5211 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
5216 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5220 ; GFX12-CU: ; %bb.0: ; %entry
5221 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
5222 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
5223 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
5224 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
5225 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
5230 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5241 ; GFX6: ; %bb.0: ; %entry
5243 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
5244 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
5245 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
5246 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
5249 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
5259 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
5263 ; GFX7: ; %bb.0: ; %entry
5265 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
5266 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
5267 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
5269 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
5284 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
5288 ; GFX10-WGP: ; %bb.0: ; %entry
5289 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
5290 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
5291 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
5292 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
5293 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
5302 ; GFX10-CU: ; %bb.0: ; %entry
5303 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
5304 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
5305 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
5306 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
5307 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
5316 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
5318 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
5319 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
5320 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
5321 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
5324 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
5334 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
5338 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
5339 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
5340 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
5341 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
5342 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
5343 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
5352 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
5353 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
5354 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
5355 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
5356 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
5357 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
5366 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
5367 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
5368 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
5369 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
5370 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
5371 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
5376 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
5380 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
5381 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
5382 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
5383 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
5384 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
5385 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
5390 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
5394 ; GFX11-WGP: ; %bb.0: ; %entry
5395 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
5396 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
5397 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
5398 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
5399 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
5404 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5408 ; GFX11-CU: ; %bb.0: ; %entry
5409 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
5410 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
5411 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
5412 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
5413 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
5418 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5422 ; GFX12-WGP: ; %bb.0: ; %entry
5423 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
5424 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
5425 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
5426 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
5427 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
5432 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5436 ; GFX12-CU: ; %bb.0: ; %entry
5437 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
5438 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
5439 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
5440 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
5441 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
5446 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5457 ; GFX6: ; %bb.0: ; %entry
5459 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
5460 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
5461 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
5462 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
5465 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
5475 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
5479 ; GFX7: ; %bb.0: ; %entry
5481 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
5482 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
5483 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
5485 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
5500 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
5504 ; GFX10-WGP: ; %bb.0: ; %entry
5505 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
5506 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
5507 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
5508 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
5509 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
5518 ; GFX10-CU: ; %bb.0: ; %entry
5519 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
5520 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
5521 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
5522 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
5523 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
5532 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
5534 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
5535 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
5536 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
5537 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
5540 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
5550 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
5554 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
5555 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
5556 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
5557 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
5558 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
5559 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
5568 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
5569 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
5570 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
5571 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
5572 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
5573 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
5582 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
5583 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
5584 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
5585 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
5586 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
5587 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
5592 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
5596 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
5597 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
5598 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
5599 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
5600 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
5601 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
5606 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
5610 ; GFX11-WGP: ; %bb.0: ; %entry
5611 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
5612 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
5613 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
5614 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
5615 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
5620 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5624 ; GFX11-CU: ; %bb.0: ; %entry
5625 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
5626 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
5627 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
5628 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
5629 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
5634 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5638 ; GFX12-WGP: ; %bb.0: ; %entry
5639 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
5640 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
5641 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
5642 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
5643 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
5648 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5652 ; GFX12-CU: ; %bb.0: ; %entry
5653 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
5654 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
5655 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
5656 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
5657 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
5662 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5673 ; GFX6: ; %bb.0: ; %entry
5675 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
5676 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
5677 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
5678 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
5681 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
5691 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
5695 ; GFX7: ; %bb.0: ; %entry
5697 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
5698 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
5699 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
5701 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
5716 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
5720 ; GFX10-WGP: ; %bb.0: ; %entry
5721 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
5722 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
5723 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
5724 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
5725 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
5734 ; GFX10-CU: ; %bb.0: ; %entry
5735 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
5736 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
5737 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
5738 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
5739 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
5748 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
5750 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
5751 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
5752 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
5753 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
5756 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
5766 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
5770 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
5771 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
5772 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
5773 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
5774 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
5775 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
5784 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
5785 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
5786 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
5787 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
5788 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
5789 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
5798 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
5799 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
5800 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
5801 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
5802 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
5803 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
5808 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
5812 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
5813 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
5814 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
5815 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
5816 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
5817 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
5822 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
5826 ; GFX11-WGP: ; %bb.0: ; %entry
5827 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
5828 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
5829 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
5830 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
5831 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
5836 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5840 ; GFX11-CU: ; %bb.0: ; %entry
5841 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
5842 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
5843 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
5844 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
5845 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
5850 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5854 ; GFX12-WGP: ; %bb.0: ; %entry
5855 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
5856 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
5857 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
5858 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
5859 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
5864 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5868 ; GFX12-CU: ; %bb.0: ; %entry
5869 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
5870 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
5871 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
5872 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
5873 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
5878 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5889 ; GFX6: ; %bb.0: ; %entry
5891 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
5892 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
5893 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
5894 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
5897 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
5907 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
5909 ; GFX6-NEXT: s_waitcnt vmcnt(0)
5910 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
5914 ; GFX7: ; %bb.0: ; %entry
5916 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
5917 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
5918 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
5920 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
5935 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
5938 ; GFX7-NEXT: s_waitcnt vmcnt(0)
5939 ; GFX7-NEXT: flat_store_dword v[0:1], v2
5943 ; GFX10-WGP: ; %bb.0: ; %entry
5944 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
5945 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
5946 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
5947 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
5948 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
5954 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
5959 ; GFX10-CU: ; %bb.0: ; %entry
5960 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
5961 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
5962 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
5963 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
5964 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
5970 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
5975 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
5977 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
5978 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
5979 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
5980 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
5983 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
5993 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
5995 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
5996 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
6000 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
6001 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
6002 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
6003 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
6004 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
6005 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
6011 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
6016 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
6017 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
6018 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
6019 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
6020 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
6021 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
6027 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
6032 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
6033 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
6034 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
6035 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
6036 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
6037 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
6042 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
6043 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
6044 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
6048 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
6049 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
6050 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
6051 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
6052 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
6053 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
6058 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
6059 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
6060 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
6064 ; GFX11-WGP: ; %bb.0: ; %entry
6065 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
6066 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
6067 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
6068 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
6069 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
6074 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
6075 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
6076 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
6080 ; GFX11-CU: ; %bb.0: ; %entry
6081 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
6082 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
6083 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
6084 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
6085 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
6090 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
6091 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
6092 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
6096 ; GFX12-WGP: ; %bb.0: ; %entry
6097 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
6098 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
6099 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
6100 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
6101 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
6106 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
6107 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
6108 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
6112 ; GFX12-CU: ; %bb.0: ; %entry
6113 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
6114 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
6115 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
6116 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
6117 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
6122 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
6123 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
6124 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
6130 %val0 = extractvalue { i32, i1 } %val, 0
6137 ; GFX6: ; %bb.0: ; %entry
6139 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
6140 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
6141 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
6142 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
6145 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
6155 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
6157 ; GFX6-NEXT: s_waitcnt vmcnt(0)
6158 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
6162 ; GFX7: ; %bb.0: ; %entry
6164 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
6165 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
6166 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
6168 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
6183 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
6186 ; GFX7-NEXT: s_waitcnt vmcnt(0)
6187 ; GFX7-NEXT: flat_store_dword v[0:1], v2
6191 ; GFX10-WGP: ; %bb.0: ; %entry
6192 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
6193 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
6194 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
6195 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
6196 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
6202 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
6207 ; GFX10-CU: ; %bb.0: ; %entry
6208 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
6209 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
6210 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
6211 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
6212 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
6218 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
6223 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
6225 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
6226 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
6227 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
6228 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
6231 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
6241 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
6243 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
6244 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
6248 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
6249 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
6250 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
6251 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
6252 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
6253 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
6259 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
6264 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
6265 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
6266 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
6267 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
6268 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
6269 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
6275 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
6280 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
6281 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
6282 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
6283 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
6284 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
6285 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
6290 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
6291 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
6292 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
6296 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
6297 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
6298 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
6299 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
6300 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
6301 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
6306 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
6307 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
6308 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
6312 ; GFX11-WGP: ; %bb.0: ; %entry
6313 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
6314 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
6315 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
6316 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
6317 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
6322 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
6323 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
6324 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
6328 ; GFX11-CU: ; %bb.0: ; %entry
6329 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
6330 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
6331 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
6332 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
6333 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
6338 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
6339 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
6340 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
6344 ; GFX12-WGP: ; %bb.0: ; %entry
6345 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
6346 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
6347 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
6348 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
6349 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
6354 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
6355 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
6356 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
6360 ; GFX12-CU: ; %bb.0: ; %entry
6361 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
6362 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
6363 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
6364 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
6365 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
6370 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
6371 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
6372 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
6378 %val0 = extractvalue { i32, i1 } %val, 0
6385 ; GFX6: ; %bb.0: ; %entry
6387 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
6388 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
6389 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
6390 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
6393 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
6403 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
6405 ; GFX6-NEXT: s_waitcnt vmcnt(0)
6406 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
6410 ; GFX7: ; %bb.0: ; %entry
6412 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
6413 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
6414 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
6416 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
6431 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
6434 ; GFX7-NEXT: s_waitcnt vmcnt(0)
6435 ; GFX7-NEXT: flat_store_dword v[0:1], v2
6439 ; GFX10-WGP: ; %bb.0: ; %entry
6440 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
6441 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
6442 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
6443 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
6444 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
6450 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
6455 ; GFX10-CU: ; %bb.0: ; %entry
6456 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
6457 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
6458 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
6459 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
6460 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
6466 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
6471 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
6473 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
6474 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
6475 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
6476 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
6479 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
6489 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
6491 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
6492 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
6496 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
6497 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
6498 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
6499 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
6500 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
6501 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
6507 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
6512 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
6513 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
6514 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
6515 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
6516 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
6517 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
6523 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
6528 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
6529 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
6530 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
6531 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
6532 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
6533 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
6538 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
6539 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
6540 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
6544 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
6545 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
6546 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
6547 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
6548 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
6549 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
6554 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
6555 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
6556 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
6560 ; GFX11-WGP: ; %bb.0: ; %entry
6561 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
6562 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
6563 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
6564 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
6565 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
6570 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
6571 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
6572 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
6576 ; GFX11-CU: ; %bb.0: ; %entry
6577 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
6578 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
6579 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
6580 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
6581 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
6586 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
6587 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
6588 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
6592 ; GFX12-WGP: ; %bb.0: ; %entry
6593 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
6594 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
6595 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
6596 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
6597 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
6602 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
6603 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
6604 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
6608 ; GFX12-CU: ; %bb.0: ; %entry
6609 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
6610 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
6611 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
6612 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
6613 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
6618 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
6619 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
6620 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
6626 %val0 = extractvalue { i32, i1 } %val, 0
6633 ; GFX6: ; %bb.0: ; %entry
6635 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
6636 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
6637 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
6638 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
6641 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
6651 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
6653 ; GFX6-NEXT: s_waitcnt vmcnt(0)
6654 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
6658 ; GFX7: ; %bb.0: ; %entry
6660 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
6661 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
6662 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
6664 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
6679 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
6682 ; GFX7-NEXT: s_waitcnt vmcnt(0)
6683 ; GFX7-NEXT: flat_store_dword v[0:1], v2
6687 ; GFX10-WGP: ; %bb.0: ; %entry
6688 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
6689 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
6690 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
6691 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
6692 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
6698 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
6703 ; GFX10-CU: ; %bb.0: ; %entry
6704 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
6705 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
6706 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
6707 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
6708 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
6714 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
6719 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
6721 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
6722 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
6723 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
6724 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
6727 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
6737 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
6739 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
6740 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
6744 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
6745 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
6746 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
6747 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
6748 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
6749 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
6755 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
6760 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
6761 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
6762 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
6763 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
6764 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
6765 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
6771 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
6776 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
6777 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
6778 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
6779 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
6780 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
6781 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
6786 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
6787 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
6788 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
6792 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
6793 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
6794 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
6795 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
6796 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
6797 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
6802 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
6803 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
6804 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
6808 ; GFX11-WGP: ; %bb.0: ; %entry
6809 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
6810 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
6811 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
6812 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
6813 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
6818 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
6819 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
6820 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
6824 ; GFX11-CU: ; %bb.0: ; %entry
6825 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
6826 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
6827 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
6828 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
6829 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
6834 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
6835 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
6836 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
6840 ; GFX12-WGP: ; %bb.0: ; %entry
6841 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
6842 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
6843 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
6844 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
6845 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
6850 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
6851 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
6852 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
6856 ; GFX12-CU: ; %bb.0: ; %entry
6857 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
6858 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
6859 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
6860 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
6861 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
6866 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
6867 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
6868 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
6874 %val0 = extractvalue { i32, i1 } %val, 0
6881 ; GFX6: ; %bb.0: ; %entry
6883 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
6884 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
6885 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
6886 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
6889 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
6899 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
6901 ; GFX6-NEXT: s_waitcnt vmcnt(0)
6902 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
6906 ; GFX7: ; %bb.0: ; %entry
6908 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
6909 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
6910 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
6912 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
6927 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
6930 ; GFX7-NEXT: s_waitcnt vmcnt(0)
6931 ; GFX7-NEXT: flat_store_dword v[0:1], v2
6935 ; GFX10-WGP: ; %bb.0: ; %entry
6936 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
6937 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
6938 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
6939 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
6940 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
6946 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
6951 ; GFX10-CU: ; %bb.0: ; %entry
6952 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
6953 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
6954 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
6955 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
6956 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
6962 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
6967 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
6969 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
6970 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
6971 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
6972 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
6975 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
6985 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
6987 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
6988 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
6992 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
6993 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
6994 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
6995 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
6996 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
6997 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
7003 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
7008 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
7009 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
7010 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
7011 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
7012 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
7013 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
7019 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
7024 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
7025 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
7026 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
7027 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
7028 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
7029 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
7034 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
7035 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
7036 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
7040 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
7041 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
7042 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
7043 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
7044 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
7045 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
7050 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
7051 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
7052 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
7056 ; GFX11-WGP: ; %bb.0: ; %entry
7057 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
7058 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
7059 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
7060 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
7061 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
7066 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7067 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
7068 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
7072 ; GFX11-CU: ; %bb.0: ; %entry
7073 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
7074 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
7075 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
7076 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
7077 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
7082 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7083 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
7084 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
7088 ; GFX12-WGP: ; %bb.0: ; %entry
7089 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
7090 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
7091 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
7092 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
7093 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
7098 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
7099 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
7100 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
7104 ; GFX12-CU: ; %bb.0: ; %entry
7105 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
7106 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
7107 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
7108 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
7109 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
7114 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
7115 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
7116 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
7122 %val0 = extractvalue { i32, i1 } %val, 0
7129 ; GFX6: ; %bb.0: ; %entry
7131 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
7132 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
7133 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
7134 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
7137 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
7147 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
7149 ; GFX6-NEXT: s_waitcnt vmcnt(0)
7150 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
7154 ; GFX7: ; %bb.0: ; %entry
7156 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
7157 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
7158 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
7160 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
7175 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
7178 ; GFX7-NEXT: s_waitcnt vmcnt(0)
7179 ; GFX7-NEXT: flat_store_dword v[0:1], v2
7183 ; GFX10-WGP: ; %bb.0: ; %entry
7184 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
7185 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
7186 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
7187 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
7188 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
7194 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
7199 ; GFX10-CU: ; %bb.0: ; %entry
7200 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
7201 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
7202 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
7203 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
7204 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
7210 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
7215 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
7217 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
7218 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
7219 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
7220 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
7223 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
7233 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
7235 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
7236 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
7240 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
7241 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
7242 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
7243 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
7244 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
7245 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
7251 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
7256 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
7257 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
7258 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
7259 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
7260 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
7261 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
7267 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
7272 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
7273 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
7274 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
7275 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
7276 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
7277 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
7282 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
7283 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
7284 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
7288 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
7289 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
7290 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
7291 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
7292 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
7293 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
7298 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
7299 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
7300 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
7304 ; GFX11-WGP: ; %bb.0: ; %entry
7305 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
7306 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
7307 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
7308 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
7309 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
7314 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7315 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
7316 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
7320 ; GFX11-CU: ; %bb.0: ; %entry
7321 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
7322 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
7323 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
7324 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
7325 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
7330 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7331 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
7332 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
7336 ; GFX12-WGP: ; %bb.0: ; %entry
7337 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
7338 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
7339 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
7340 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
7341 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
7346 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
7347 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
7348 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
7352 ; GFX12-CU: ; %bb.0: ; %entry
7353 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
7354 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
7355 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
7356 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
7357 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
7362 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
7363 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
7364 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
7370 %val0 = extractvalue { i32, i1 } %val, 0
7377 ; GFX6: ; %bb.0: ; %entry
7379 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
7380 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
7381 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
7382 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
7385 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
7395 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
7397 ; GFX6-NEXT: s_waitcnt vmcnt(0)
7398 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
7402 ; GFX7: ; %bb.0: ; %entry
7404 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
7405 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
7406 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
7408 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
7423 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
7426 ; GFX7-NEXT: s_waitcnt vmcnt(0)
7427 ; GFX7-NEXT: flat_store_dword v[0:1], v2
7431 ; GFX10-WGP: ; %bb.0: ; %entry
7432 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
7433 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
7434 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
7435 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
7436 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
7442 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
7447 ; GFX10-CU: ; %bb.0: ; %entry
7448 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
7449 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
7450 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
7451 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
7452 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
7458 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
7463 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
7465 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
7466 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
7467 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
7468 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
7471 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
7481 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
7483 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
7484 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
7488 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
7489 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
7490 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
7491 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
7492 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
7493 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
7499 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
7504 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
7505 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
7506 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
7507 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
7508 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
7509 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
7515 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
7520 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
7521 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
7522 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
7523 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
7524 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
7525 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
7530 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
7531 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
7532 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
7536 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
7537 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
7538 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
7539 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
7540 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
7541 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
7546 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
7547 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
7548 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
7552 ; GFX11-WGP: ; %bb.0: ; %entry
7553 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
7554 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
7555 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
7556 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
7557 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
7562 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7563 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
7564 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
7568 ; GFX11-CU: ; %bb.0: ; %entry
7569 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
7570 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
7571 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
7572 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
7573 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
7578 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7579 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
7580 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
7584 ; GFX12-WGP: ; %bb.0: ; %entry
7585 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
7586 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
7587 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
7588 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
7589 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
7594 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
7595 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
7596 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
7600 ; GFX12-CU: ; %bb.0: ; %entry
7601 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
7602 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
7603 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
7604 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
7605 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
7610 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
7611 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
7612 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
7618 %val0 = extractvalue { i32, i1 } %val, 0
7625 ; GFX6: ; %bb.0: ; %entry
7627 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
7628 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
7629 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
7630 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
7633 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
7643 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
7645 ; GFX6-NEXT: s_waitcnt vmcnt(0)
7646 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
7650 ; GFX7: ; %bb.0: ; %entry
7652 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
7653 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
7654 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
7656 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
7671 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
7674 ; GFX7-NEXT: s_waitcnt vmcnt(0)
7675 ; GFX7-NEXT: flat_store_dword v[0:1], v2
7679 ; GFX10-WGP: ; %bb.0: ; %entry
7680 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
7681 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
7682 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
7683 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
7684 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
7690 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
7695 ; GFX10-CU: ; %bb.0: ; %entry
7696 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
7697 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
7698 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
7699 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
7700 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
7706 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
7711 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
7713 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
7714 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
7715 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
7716 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
7719 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
7729 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
7731 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
7732 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
7736 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
7737 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
7738 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
7739 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
7740 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
7741 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
7747 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
7752 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
7753 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
7754 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
7755 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
7756 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
7757 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
7763 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
7768 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
7769 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
7770 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
7771 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
7772 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
7773 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
7778 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
7779 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
7780 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
7784 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
7785 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
7786 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
7787 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
7788 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
7789 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
7794 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
7795 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
7796 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
7800 ; GFX11-WGP: ; %bb.0: ; %entry
7801 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
7802 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
7803 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
7804 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
7805 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
7810 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7811 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
7812 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
7816 ; GFX11-CU: ; %bb.0: ; %entry
7817 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
7818 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
7819 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
7820 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
7821 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
7826 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7827 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
7828 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
7832 ; GFX12-WGP: ; %bb.0: ; %entry
7833 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
7834 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
7835 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
7836 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
7837 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
7842 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
7843 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
7844 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
7848 ; GFX12-CU: ; %bb.0: ; %entry
7849 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
7850 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
7851 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
7852 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
7853 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
7858 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
7859 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
7860 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
7866 %val0 = extractvalue { i32, i1 } %val, 0
7873 ; GFX6: ; %bb.0: ; %entry
7875 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
7876 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
7877 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
7878 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
7881 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
7891 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
7893 ; GFX6-NEXT: s_waitcnt vmcnt(0)
7894 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
7898 ; GFX7: ; %bb.0: ; %entry
7900 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
7901 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
7902 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
7904 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
7919 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
7922 ; GFX7-NEXT: s_waitcnt vmcnt(0)
7923 ; GFX7-NEXT: flat_store_dword v[0:1], v2
7927 ; GFX10-WGP: ; %bb.0: ; %entry
7928 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
7929 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
7930 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
7931 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
7932 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
7938 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
7943 ; GFX10-CU: ; %bb.0: ; %entry
7944 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
7945 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
7946 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
7947 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
7948 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
7954 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
7959 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
7961 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
7962 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
7963 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
7964 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
7967 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
7977 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
7979 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
7980 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
7984 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
7985 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
7986 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
7987 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
7988 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
7989 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
7995 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
8000 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
8001 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
8002 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
8003 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
8004 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
8005 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
8011 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
8016 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
8017 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
8018 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
8019 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
8020 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
8021 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
8026 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
8027 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
8028 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
8032 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
8033 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
8034 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
8035 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
8036 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
8037 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
8042 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
8043 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
8044 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
8048 ; GFX11-WGP: ; %bb.0: ; %entry
8049 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
8050 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
8051 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
8052 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
8053 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
8058 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8059 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
8060 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
8064 ; GFX11-CU: ; %bb.0: ; %entry
8065 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
8066 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
8067 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
8068 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
8069 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
8074 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8075 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
8076 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
8080 ; GFX12-WGP: ; %bb.0: ; %entry
8081 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
8082 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
8083 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
8084 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
8085 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
8090 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
8091 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
8092 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
8096 ; GFX12-CU: ; %bb.0: ; %entry
8097 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
8098 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
8099 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
8100 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
8101 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
8106 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
8107 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
8108 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
8114 %val0 = extractvalue { i32, i1 } %val, 0
8121 ; GFX6: ; %bb.0: ; %entry
8123 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
8124 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
8125 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
8126 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
8129 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
8139 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
8141 ; GFX6-NEXT: s_waitcnt vmcnt(0)
8142 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
8146 ; GFX7: ; %bb.0: ; %entry
8148 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
8149 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
8150 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
8152 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
8167 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
8170 ; GFX7-NEXT: s_waitcnt vmcnt(0)
8171 ; GFX7-NEXT: flat_store_dword v[0:1], v2
8175 ; GFX10-WGP: ; %bb.0: ; %entry
8176 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
8177 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
8178 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
8179 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
8180 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
8186 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
8191 ; GFX10-CU: ; %bb.0: ; %entry
8192 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
8193 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
8194 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
8195 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
8196 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
8202 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
8207 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
8209 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
8210 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
8211 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
8212 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
8215 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
8225 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
8227 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
8228 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
8232 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
8233 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
8234 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
8235 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
8236 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
8237 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
8243 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
8248 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
8249 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
8250 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
8251 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
8252 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
8253 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
8259 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
8264 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
8265 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
8266 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
8267 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
8268 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
8269 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
8274 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
8275 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
8276 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
8280 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
8281 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
8282 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
8283 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
8284 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
8285 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
8290 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
8291 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
8292 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
8296 ; GFX11-WGP: ; %bb.0: ; %entry
8297 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
8298 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
8299 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
8300 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
8301 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
8306 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8307 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
8308 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
8312 ; GFX11-CU: ; %bb.0: ; %entry
8313 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
8314 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
8315 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
8316 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
8317 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
8322 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8323 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
8324 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
8328 ; GFX12-WGP: ; %bb.0: ; %entry
8329 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
8330 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
8331 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
8332 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
8333 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
8338 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
8339 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
8340 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
8344 ; GFX12-CU: ; %bb.0: ; %entry
8345 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
8346 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
8347 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
8348 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
8349 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
8354 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
8355 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
8356 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
8362 %val0 = extractvalue { i32, i1 } %val, 0
8369 ; GFX6: ; %bb.0: ; %entry
8371 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
8372 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
8373 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
8374 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
8377 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
8387 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
8389 ; GFX6-NEXT: s_waitcnt vmcnt(0)
8390 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
8394 ; GFX7: ; %bb.0: ; %entry
8396 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
8397 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
8398 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
8400 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
8415 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
8418 ; GFX7-NEXT: s_waitcnt vmcnt(0)
8419 ; GFX7-NEXT: flat_store_dword v[0:1], v2
8423 ; GFX10-WGP: ; %bb.0: ; %entry
8424 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
8425 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
8426 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
8427 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
8428 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
8434 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
8439 ; GFX10-CU: ; %bb.0: ; %entry
8440 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
8441 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
8442 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
8443 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
8444 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
8450 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
8455 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
8457 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
8458 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
8459 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
8460 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
8463 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
8473 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
8475 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
8476 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
8480 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
8481 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
8482 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
8483 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
8484 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
8485 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
8491 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
8496 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
8497 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
8498 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
8499 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
8500 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
8501 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
8507 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
8512 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
8513 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
8514 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
8515 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
8516 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
8517 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
8522 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
8523 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
8524 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
8528 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
8529 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
8530 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
8531 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
8532 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
8533 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
8538 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
8539 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
8540 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
8544 ; GFX11-WGP: ; %bb.0: ; %entry
8545 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
8546 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
8547 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
8548 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
8549 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
8554 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8555 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
8556 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
8560 ; GFX11-CU: ; %bb.0: ; %entry
8561 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
8562 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
8563 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
8564 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
8565 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
8570 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8571 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
8572 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
8576 ; GFX12-WGP: ; %bb.0: ; %entry
8577 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
8578 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
8579 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
8580 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
8581 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
8586 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
8587 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
8588 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
8592 ; GFX12-CU: ; %bb.0: ; %entry
8593 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
8594 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
8595 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
8596 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
8597 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
8602 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
8603 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
8604 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
8610 %val0 = extractvalue { i32, i1 } %val, 0
8617 ; GFX6: ; %bb.0: ; %entry
8619 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
8620 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
8621 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
8622 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
8625 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
8635 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
8637 ; GFX6-NEXT: s_waitcnt vmcnt(0)
8638 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
8642 ; GFX7: ; %bb.0: ; %entry
8644 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
8645 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
8646 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
8648 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
8663 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
8666 ; GFX7-NEXT: s_waitcnt vmcnt(0)
8667 ; GFX7-NEXT: flat_store_dword v[0:1], v2
8671 ; GFX10-WGP: ; %bb.0: ; %entry
8672 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
8673 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
8674 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
8675 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
8676 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
8682 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
8687 ; GFX10-CU: ; %bb.0: ; %entry
8688 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
8689 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
8690 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
8691 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
8692 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
8698 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
8703 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
8705 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
8706 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
8707 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
8708 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
8711 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
8721 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
8723 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
8724 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
8728 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
8729 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
8730 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
8731 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
8732 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
8733 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
8739 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
8744 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
8745 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
8746 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
8747 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
8748 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
8749 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
8755 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
8760 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
8761 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
8762 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
8763 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
8764 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
8765 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
8770 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
8771 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
8772 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
8776 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
8777 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
8778 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
8779 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
8780 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
8781 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
8786 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
8787 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
8788 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
8792 ; GFX11-WGP: ; %bb.0: ; %entry
8793 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
8794 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
8795 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
8796 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
8797 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
8802 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8803 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
8804 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
8808 ; GFX11-CU: ; %bb.0: ; %entry
8809 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
8810 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
8811 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
8812 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
8813 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
8818 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8819 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
8820 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
8824 ; GFX12-WGP: ; %bb.0: ; %entry
8825 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
8826 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
8827 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
8828 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
8829 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
8834 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
8835 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
8836 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
8840 ; GFX12-CU: ; %bb.0: ; %entry
8841 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
8842 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
8843 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
8844 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
8845 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
8850 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
8851 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
8852 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
8858 %val0 = extractvalue { i32, i1 } %val, 0
8865 ; GFX6: ; %bb.0: ; %entry
8867 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
8868 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
8869 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
8870 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
8873 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
8883 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
8885 ; GFX6-NEXT: s_waitcnt vmcnt(0)
8886 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
8890 ; GFX7: ; %bb.0: ; %entry
8892 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
8893 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
8894 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
8896 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
8911 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
8914 ; GFX7-NEXT: s_waitcnt vmcnt(0)
8915 ; GFX7-NEXT: flat_store_dword v[0:1], v2
8919 ; GFX10-WGP: ; %bb.0: ; %entry
8920 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
8921 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
8922 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
8923 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
8924 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
8930 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
8935 ; GFX10-CU: ; %bb.0: ; %entry
8936 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
8937 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
8938 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
8939 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
8940 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
8946 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
8951 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
8953 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
8954 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
8955 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
8956 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
8959 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
8969 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
8971 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
8972 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
8976 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
8977 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
8978 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
8979 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
8980 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
8981 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
8987 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
8992 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
8993 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
8994 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
8995 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
8996 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
8997 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
9003 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
9008 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
9009 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
9010 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
9011 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
9012 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
9013 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
9018 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
9019 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
9020 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
9024 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
9025 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
9026 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
9027 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
9028 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
9029 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
9034 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
9035 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
9036 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
9040 ; GFX11-WGP: ; %bb.0: ; %entry
9041 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
9042 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
9043 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
9044 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
9045 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
9050 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9051 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
9052 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
9056 ; GFX11-CU: ; %bb.0: ; %entry
9057 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
9058 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
9059 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
9060 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
9061 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
9066 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9067 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
9068 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
9072 ; GFX12-WGP: ; %bb.0: ; %entry
9073 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
9074 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
9075 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
9076 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
9077 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
9082 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
9083 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
9084 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
9088 ; GFX12-CU: ; %bb.0: ; %entry
9089 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
9090 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
9091 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
9092 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
9093 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
9098 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
9099 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
9100 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
9106 %val0 = extractvalue { i32, i1 } %val, 0
9113 ; GFX6: ; %bb.0: ; %entry
9115 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
9116 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
9117 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
9118 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
9121 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
9131 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
9133 ; GFX6-NEXT: s_waitcnt vmcnt(0)
9134 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
9138 ; GFX7: ; %bb.0: ; %entry
9140 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
9141 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
9142 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
9144 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
9159 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
9162 ; GFX7-NEXT: s_waitcnt vmcnt(0)
9163 ; GFX7-NEXT: flat_store_dword v[0:1], v2
9167 ; GFX10-WGP: ; %bb.0: ; %entry
9168 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
9169 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
9170 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
9171 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
9172 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
9178 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
9183 ; GFX10-CU: ; %bb.0: ; %entry
9184 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
9185 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
9186 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
9187 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
9188 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
9194 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
9199 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
9201 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
9202 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
9203 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
9204 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
9207 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
9217 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
9219 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
9220 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
9224 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
9225 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
9226 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
9227 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
9228 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
9229 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
9235 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
9240 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
9241 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
9242 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
9243 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
9244 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
9245 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
9251 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
9256 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
9257 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
9258 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
9259 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
9260 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
9261 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
9266 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
9267 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
9268 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
9272 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
9273 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
9274 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
9275 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
9276 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
9277 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
9282 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
9283 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
9284 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
9288 ; GFX11-WGP: ; %bb.0: ; %entry
9289 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
9290 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
9291 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
9292 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
9293 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
9298 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9299 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
9300 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
9304 ; GFX11-CU: ; %bb.0: ; %entry
9305 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
9306 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
9307 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
9308 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
9309 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
9314 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9315 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
9316 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
9320 ; GFX12-WGP: ; %bb.0: ; %entry
9321 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
9322 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
9323 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
9324 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
9325 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
9330 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
9331 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
9332 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
9336 ; GFX12-CU: ; %bb.0: ; %entry
9337 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
9338 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
9339 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
9340 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
9341 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
9346 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
9347 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
9348 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
9354 %val0 = extractvalue { i32, i1 } %val, 0
9361 ; GFX6: ; %bb.0: ; %entry
9363 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
9364 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
9365 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
9366 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
9369 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
9379 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
9381 ; GFX6-NEXT: s_waitcnt vmcnt(0)
9382 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
9386 ; GFX7: ; %bb.0: ; %entry
9388 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
9389 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
9390 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
9392 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
9407 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
9410 ; GFX7-NEXT: s_waitcnt vmcnt(0)
9411 ; GFX7-NEXT: flat_store_dword v[0:1], v2
9415 ; GFX10-WGP: ; %bb.0: ; %entry
9416 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
9417 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
9418 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
9419 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
9420 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
9426 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
9431 ; GFX10-CU: ; %bb.0: ; %entry
9432 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
9433 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
9434 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
9435 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
9436 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
9442 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
9447 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
9449 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
9450 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
9451 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
9452 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
9455 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
9465 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
9467 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
9468 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
9472 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
9473 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
9474 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
9475 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
9476 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
9477 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
9483 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
9488 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
9489 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
9490 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
9491 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
9492 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
9493 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
9499 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
9504 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
9505 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
9506 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
9507 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
9508 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
9509 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
9514 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
9515 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
9516 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
9520 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
9521 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
9522 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
9523 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
9524 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
9525 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
9530 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
9531 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
9532 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
9536 ; GFX11-WGP: ; %bb.0: ; %entry
9537 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
9538 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
9539 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
9540 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
9541 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
9546 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9547 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
9548 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
9552 ; GFX11-CU: ; %bb.0: ; %entry
9553 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
9554 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
9555 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
9556 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
9557 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
9562 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9563 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
9564 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
9568 ; GFX12-WGP: ; %bb.0: ; %entry
9569 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
9570 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
9571 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
9572 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
9573 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
9578 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
9579 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
9580 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
9584 ; GFX12-CU: ; %bb.0: ; %entry
9585 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
9586 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
9587 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
9588 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
9589 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
9594 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
9595 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
9596 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
9602 %val0 = extractvalue { i32, i1 } %val, 0
9609 ; GFX6: ; %bb.0: ; %entry
9611 ; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
9612 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
9613 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
9616 ; GFX6-NEXT: s_mov_b32 s12, 0x100f000
9628 ; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0
9629 ; GFX6-NEXT: s_waitcnt vmcnt(0)
9630 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
9634 ; GFX7: ; %bb.0: ; %entry
9635 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
9636 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2
9637 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
9640 ; GFX7-NEXT: flat_load_dword v2, v[0:1]
9643 ; GFX7-NEXT: s_waitcnt vmcnt(0)
9644 ; GFX7-NEXT: flat_store_dword v[0:1], v2
9648 ; GFX10-WGP: ; %bb.0: ; %entry
9649 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
9650 ; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
9651 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
9652 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
9654 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
9659 ; GFX10-CU: ; %bb.0: ; %entry
9660 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
9661 ; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
9662 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
9663 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
9665 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
9670 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
9671 ; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5]
9672 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
9673 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
9674 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
9677 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000
9689 ; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0
9690 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
9691 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
9695 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
9696 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
9697 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
9698 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
9699 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
9701 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
9706 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
9707 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
9708 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
9709 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
9710 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
9712 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
9717 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
9718 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
9719 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
9720 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
9721 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
9723 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
9724 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
9728 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
9729 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
9730 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
9731 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
9732 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
9734 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
9735 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
9739 ; GFX11-WGP: ; %bb.0: ; %entry
9740 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
9741 ; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
9742 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
9743 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
9745 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
9746 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
9750 ; GFX11-CU: ; %bb.0: ; %entry
9751 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
9752 ; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
9753 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
9754 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
9756 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
9757 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
9761 ; GFX12-WGP: ; %bb.0: ; %entry
9762 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
9763 ; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
9764 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
9765 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
9767 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
9768 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
9772 ; GFX12-CU: ; %bb.0: ; %entry
9773 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
9774 ; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
9775 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
9776 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
9778 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
9779 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
9790 ; GFX6: ; %bb.0: ; %entry
9792 ; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
9793 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
9794 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
9797 ; GFX6-NEXT: s_mov_b32 s12, 0x100f000
9809 ; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0
9810 ; GFX6-NEXT: s_waitcnt vmcnt(0)
9811 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
9815 ; GFX7: ; %bb.0: ; %entry
9816 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
9817 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2
9818 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
9821 ; GFX7-NEXT: flat_load_dword v2, v[0:1]
9824 ; GFX7-NEXT: s_waitcnt vmcnt(0)
9825 ; GFX7-NEXT: flat_store_dword v[0:1], v2
9829 ; GFX10-WGP: ; %bb.0: ; %entry
9830 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
9831 ; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
9832 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
9833 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
9835 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
9840 ; GFX10-CU: ; %bb.0: ; %entry
9841 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
9842 ; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
9843 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
9844 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
9846 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
9851 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
9852 ; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5]
9853 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
9854 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
9855 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
9858 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000
9870 ; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0
9871 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
9872 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
9876 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
9877 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
9878 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
9879 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
9880 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
9882 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
9887 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
9888 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
9889 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
9890 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
9891 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
9893 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
9898 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
9899 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
9900 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
9901 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
9902 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
9904 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
9905 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
9909 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
9910 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
9911 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
9912 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
9913 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
9915 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
9916 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
9920 ; GFX11-WGP: ; %bb.0: ; %entry
9921 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
9922 ; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
9923 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
9924 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
9926 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
9927 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
9931 ; GFX11-CU: ; %bb.0: ; %entry
9932 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
9933 ; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
9934 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
9935 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
9937 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
9938 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
9942 ; GFX12-WGP: ; %bb.0: ; %entry
9943 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
9944 ; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
9945 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
9946 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
9948 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
9949 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
9953 ; GFX12-CU: ; %bb.0: ; %entry
9954 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
9955 ; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
9956 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
9957 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
9959 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
9960 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
9971 ; GFX6: ; %bb.0: ; %entry
9973 ; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
9974 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
9975 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
9978 ; GFX6-NEXT: s_mov_b32 s12, 0x100f000
9990 ; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0
9991 ; GFX6-NEXT: s_waitcnt vmcnt(0)
9992 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
9996 ; GFX7: ; %bb.0: ; %entry
9997 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
9998 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2
9999 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
10002 ; GFX7-NEXT: flat_load_dword v2, v[0:1]
10005 ; GFX7-NEXT: s_waitcnt vmcnt(0)
10006 ; GFX7-NEXT: flat_store_dword v[0:1], v2
10010 ; GFX10-WGP: ; %bb.0: ; %entry
10011 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
10012 ; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
10013 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10014 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
10016 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
10021 ; GFX10-CU: ; %bb.0: ; %entry
10022 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
10023 ; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
10024 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10025 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
10027 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
10032 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
10033 ; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5]
10034 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
10035 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
10036 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
10039 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000
10051 ; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0
10052 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
10053 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
10057 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
10058 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10059 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
10060 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10061 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10063 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
10068 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
10069 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10070 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
10071 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10072 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10074 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
10079 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
10080 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10081 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
10082 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
10083 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10085 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
10086 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
10090 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
10091 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10092 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
10093 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
10094 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10096 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
10097 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
10101 ; GFX11-WGP: ; %bb.0: ; %entry
10102 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
10103 ; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
10104 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10105 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
10107 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
10108 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
10112 ; GFX11-CU: ; %bb.0: ; %entry
10113 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
10114 ; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
10115 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10116 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
10118 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
10119 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
10123 ; GFX12-WGP: ; %bb.0: ; %entry
10124 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
10125 ; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
10126 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10127 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
10129 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
10130 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
10134 ; GFX12-CU: ; %bb.0: ; %entry
10135 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
10136 ; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
10137 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10138 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
10140 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
10141 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
10152 ; GFX6: ; %bb.0: ; %entry
10154 ; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
10155 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
10156 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
10159 ; GFX6-NEXT: s_mov_b32 s12, 0x100f000
10171 ; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0
10172 ; GFX6-NEXT: s_waitcnt vmcnt(0)
10173 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
10177 ; GFX7: ; %bb.0: ; %entry
10178 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
10179 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2
10180 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
10183 ; GFX7-NEXT: flat_load_dword v2, v[0:1]
10186 ; GFX7-NEXT: s_waitcnt vmcnt(0)
10187 ; GFX7-NEXT: flat_store_dword v[0:1], v2
10191 ; GFX10-WGP: ; %bb.0: ; %entry
10192 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
10193 ; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
10194 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10195 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
10197 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
10202 ; GFX10-CU: ; %bb.0: ; %entry
10203 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
10204 ; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
10205 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10206 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
10208 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
10213 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
10214 ; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5]
10215 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
10216 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
10217 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
10220 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000
10232 ; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0
10233 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
10234 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
10238 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
10239 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10240 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
10241 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10242 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10244 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
10249 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
10250 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10251 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
10252 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10253 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10255 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
10260 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
10261 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10262 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
10263 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
10264 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10266 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
10267 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
10271 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
10272 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10273 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
10274 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
10275 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10277 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
10278 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
10282 ; GFX11-WGP: ; %bb.0: ; %entry
10283 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
10284 ; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
10285 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10286 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
10288 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
10289 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
10293 ; GFX11-CU: ; %bb.0: ; %entry
10294 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
10295 ; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
10296 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10297 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
10299 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
10300 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
10304 ; GFX12-WGP: ; %bb.0: ; %entry
10305 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
10306 ; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
10307 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10308 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
10310 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
10311 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
10315 ; GFX12-CU: ; %bb.0: ; %entry
10316 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
10317 ; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
10318 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10319 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
10321 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
10322 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
10333 ; GFX6: ; %bb.0: ; %entry
10335 ; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0
10336 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
10337 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
10340 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
10347 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
10351 ; GFX7: ; %bb.0: ; %entry
10352 ; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
10353 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2
10354 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
10358 ; GFX7-NEXT: flat_store_dword v[0:1], v2
10362 ; GFX10-WGP: ; %bb.0: ; %entry
10363 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
10364 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10365 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
10366 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
10372 ; GFX10-CU: ; %bb.0: ; %entry
10373 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
10374 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10375 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
10376 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
10382 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
10383 ; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5]
10384 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0
10385 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
10386 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
10389 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
10396 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
10400 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
10401 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
10402 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10403 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10404 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10410 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
10411 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
10412 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10413 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10414 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10420 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
10421 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
10422 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
10423 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10424 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10426 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
10430 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
10431 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
10432 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
10433 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10434 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10436 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
10440 ; GFX11-WGP: ; %bb.0: ; %entry
10441 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
10442 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10443 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
10444 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
10446 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
10450 ; GFX11-CU: ; %bb.0: ; %entry
10451 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
10452 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10453 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
10454 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
10456 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
10460 ; GFX12-WGP: ; %bb.0: ; %entry
10461 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
10462 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10463 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
10464 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
10466 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
10470 ; GFX12-CU: ; %bb.0: ; %entry
10471 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
10472 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10473 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
10474 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
10476 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
10486 ; GFX6: ; %bb.0: ; %entry
10488 ; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0
10489 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
10490 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
10493 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
10500 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
10504 ; GFX7: ; %bb.0: ; %entry
10505 ; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
10506 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2
10507 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
10511 ; GFX7-NEXT: flat_store_dword v[0:1], v2
10515 ; GFX10-WGP: ; %bb.0: ; %entry
10516 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
10517 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10518 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
10519 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
10525 ; GFX10-CU: ; %bb.0: ; %entry
10526 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
10527 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10528 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
10529 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
10535 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
10536 ; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5]
10537 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0
10538 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
10539 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
10542 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
10549 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
10553 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
10554 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
10555 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10556 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10557 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10563 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
10564 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
10565 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10566 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10567 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10573 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
10574 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
10575 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
10576 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10577 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10579 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
10583 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
10584 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
10585 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
10586 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10587 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10589 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
10593 ; GFX11-WGP: ; %bb.0: ; %entry
10594 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
10595 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10596 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
10597 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
10599 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
10603 ; GFX11-CU: ; %bb.0: ; %entry
10604 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
10605 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10606 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
10607 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
10609 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
10613 ; GFX12-WGP: ; %bb.0: ; %entry
10614 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
10615 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10616 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
10617 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
10619 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
10623 ; GFX12-CU: ; %bb.0: ; %entry
10624 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
10625 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10626 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
10627 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
10629 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
10639 ; GFX6: ; %bb.0: ; %entry
10641 ; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0
10642 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
10643 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
10646 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
10653 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
10657 ; GFX7: ; %bb.0: ; %entry
10658 ; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
10659 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2
10660 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
10664 ; GFX7-NEXT: flat_store_dword v[0:1], v2
10668 ; GFX10-WGP: ; %bb.0: ; %entry
10669 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
10670 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10671 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
10672 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
10678 ; GFX10-CU: ; %bb.0: ; %entry
10679 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
10680 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10681 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
10682 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
10688 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
10689 ; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5]
10690 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0
10691 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
10692 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
10695 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
10702 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
10706 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
10707 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
10708 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10709 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10710 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10716 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
10717 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
10718 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10719 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10720 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10726 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
10727 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
10728 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
10729 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10730 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10732 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
10736 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
10737 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
10738 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
10739 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10740 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10742 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
10746 ; GFX11-WGP: ; %bb.0: ; %entry
10747 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
10748 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10749 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
10750 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
10752 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
10756 ; GFX11-CU: ; %bb.0: ; %entry
10757 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
10758 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10759 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
10760 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
10762 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
10766 ; GFX12-WGP: ; %bb.0: ; %entry
10767 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
10768 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10769 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
10770 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
10772 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
10776 ; GFX12-CU: ; %bb.0: ; %entry
10777 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
10778 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10779 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
10780 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
10782 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
10792 ; GFX6: ; %bb.0: ; %entry
10794 ; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0
10795 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
10796 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
10799 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
10806 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
10810 ; GFX7: ; %bb.0: ; %entry
10811 ; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
10812 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2
10813 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
10817 ; GFX7-NEXT: flat_store_dword v[0:1], v2
10821 ; GFX10-WGP: ; %bb.0: ; %entry
10822 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
10823 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10824 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
10825 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
10831 ; GFX10-CU: ; %bb.0: ; %entry
10832 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
10833 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10834 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
10835 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
10841 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
10842 ; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5]
10843 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0
10844 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
10845 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
10848 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
10855 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
10859 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
10860 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
10861 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10862 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10863 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10869 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
10870 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
10871 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
10872 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10873 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10879 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
10880 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
10881 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
10882 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10883 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10885 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
10889 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
10890 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
10891 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
10892 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
10893 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
10895 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
10899 ; GFX11-WGP: ; %bb.0: ; %entry
10900 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
10901 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10902 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
10903 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
10905 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
10909 ; GFX11-CU: ; %bb.0: ; %entry
10910 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
10911 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10912 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
10913 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
10915 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
10919 ; GFX12-WGP: ; %bb.0: ; %entry
10920 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
10921 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10922 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
10923 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
10925 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
10929 ; GFX12-CU: ; %bb.0: ; %entry
10930 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
10931 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10932 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
10933 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
10935 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
10945 ; GFX6: ; %bb.0: ; %entry
10946 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
10947 ; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2
10948 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
10951 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
10958 ; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0
10962 ; GFX7: ; %bb.0: ; %entry
10963 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
10964 ; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2
10965 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
10969 ; GFX7-NEXT: flat_atomic_swap v[0:1], v2
10973 ; GFX10-WGP: ; %bb.0: ; %entry
10974 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
10975 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
10976 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
10977 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
10983 ; GFX10-CU: ; %bb.0: ; %entry
10984 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
10985 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
10986 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
10987 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
10993 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
10994 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
10995 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2
10996 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
10999 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
11006 ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0
11010 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
11011 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11012 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11013 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
11014 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11020 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
11021 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11022 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11023 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
11024 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11030 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
11031 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11032 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11033 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
11034 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11036 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
11040 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
11041 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11042 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11043 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
11044 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11046 ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
11050 ; GFX11-WGP: ; %bb.0: ; %entry
11051 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
11052 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11053 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
11054 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
11056 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11060 ; GFX11-CU: ; %bb.0: ; %entry
11061 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
11062 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11063 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
11064 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
11066 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11070 ; GFX12-WGP: ; %bb.0: ; %entry
11071 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
11072 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11073 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
11074 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
11076 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11080 ; GFX12-CU: ; %bb.0: ; %entry
11081 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
11082 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11083 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
11084 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
11086 ; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11096 ; GFX6: ; %bb.0: ; %entry
11097 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11098 ; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2
11099 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
11102 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
11109 ; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0
11113 ; GFX7: ; %bb.0: ; %entry
11114 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
11115 ; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2
11116 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
11120 ; GFX7-NEXT: flat_atomic_swap v[0:1], v2
11124 ; GFX10-WGP: ; %bb.0: ; %entry
11125 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
11126 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11127 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
11128 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
11134 ; GFX10-CU: ; %bb.0: ; %entry
11135 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
11136 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11137 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
11138 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
11144 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
11145 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11146 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2
11147 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
11150 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
11157 ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0
11161 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
11162 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11163 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11164 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
11165 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11171 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
11172 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11173 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11174 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
11175 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11181 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
11182 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11183 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11184 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
11185 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11187 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
11191 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
11192 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11193 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11194 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
11195 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11197 ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
11201 ; GFX11-WGP: ; %bb.0: ; %entry
11202 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
11203 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11204 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
11205 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
11207 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11211 ; GFX11-CU: ; %bb.0: ; %entry
11212 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
11213 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11214 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
11215 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
11217 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11221 ; GFX12-WGP: ; %bb.0: ; %entry
11222 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
11223 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11224 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
11225 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
11227 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11231 ; GFX12-CU: ; %bb.0: ; %entry
11232 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
11233 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11234 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
11235 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
11237 ; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11247 ; GFX6: ; %bb.0: ; %entry
11248 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11249 ; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2
11250 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
11253 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
11260 ; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0
11264 ; GFX7: ; %bb.0: ; %entry
11265 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
11266 ; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2
11267 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
11271 ; GFX7-NEXT: flat_atomic_swap v[0:1], v2
11275 ; GFX10-WGP: ; %bb.0: ; %entry
11276 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
11277 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11278 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
11279 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
11285 ; GFX10-CU: ; %bb.0: ; %entry
11286 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
11287 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11288 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
11289 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
11295 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
11296 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11297 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2
11298 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
11301 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
11308 ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0
11312 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
11313 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11314 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11315 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
11316 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11322 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
11323 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11324 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11325 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
11326 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11332 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
11333 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11334 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11335 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
11336 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11338 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
11342 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
11343 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11344 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11345 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
11346 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11348 ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
11352 ; GFX11-WGP: ; %bb.0: ; %entry
11353 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
11354 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11355 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
11356 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
11358 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11362 ; GFX11-CU: ; %bb.0: ; %entry
11363 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
11364 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11365 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
11366 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
11368 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11372 ; GFX12-WGP: ; %bb.0: ; %entry
11373 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
11374 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11375 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
11376 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
11378 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11382 ; GFX12-CU: ; %bb.0: ; %entry
11383 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
11384 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11385 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
11386 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
11388 ; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11398 ; GFX6: ; %bb.0: ; %entry
11399 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11400 ; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2
11401 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
11404 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
11411 ; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0
11415 ; GFX7: ; %bb.0: ; %entry
11416 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
11417 ; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2
11418 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
11422 ; GFX7-NEXT: flat_atomic_swap v[0:1], v2
11426 ; GFX10-WGP: ; %bb.0: ; %entry
11427 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
11428 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11429 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
11430 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
11436 ; GFX10-CU: ; %bb.0: ; %entry
11437 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
11438 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11439 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
11440 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
11446 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
11447 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11448 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2
11449 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
11452 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
11459 ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0
11463 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
11464 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11465 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11466 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
11467 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11473 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
11474 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11475 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11476 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
11477 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11483 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
11484 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11485 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11486 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
11487 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11489 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
11493 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
11494 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11495 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11496 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
11497 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11499 ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
11503 ; GFX11-WGP: ; %bb.0: ; %entry
11504 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
11505 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11506 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
11507 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
11509 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11513 ; GFX11-CU: ; %bb.0: ; %entry
11514 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
11515 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11516 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
11517 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
11519 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11523 ; GFX12-WGP: ; %bb.0: ; %entry
11524 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
11525 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11526 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
11527 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
11529 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11533 ; GFX12-CU: ; %bb.0: ; %entry
11534 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
11535 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11536 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
11537 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
11539 ; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11549 ; GFX6: ; %bb.0: ; %entry
11550 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11551 ; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2
11552 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
11555 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
11562 ; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0
11566 ; GFX7: ; %bb.0: ; %entry
11567 ; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
11568 ; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2
11569 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
11573 ; GFX7-NEXT: flat_atomic_swap v[0:1], v2
11577 ; GFX10-WGP: ; %bb.0: ; %entry
11578 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
11579 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11580 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
11581 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
11587 ; GFX10-CU: ; %bb.0: ; %entry
11588 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
11589 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11590 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
11591 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
11597 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
11598 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11599 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2
11600 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
11603 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
11610 ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0
11614 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
11615 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11616 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11617 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
11618 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11624 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
11625 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11626 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11627 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
11628 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11634 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
11635 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11636 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11637 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
11638 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11640 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
11644 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
11645 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11646 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11647 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
11648 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11650 ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1]
11654 ; GFX11-WGP: ; %bb.0: ; %entry
11655 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
11656 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11657 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
11658 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
11660 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11664 ; GFX11-CU: ; %bb.0: ; %entry
11665 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
11666 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11667 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
11668 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
11670 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11674 ; GFX12-WGP: ; %bb.0: ; %entry
11675 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
11676 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11677 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
11678 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
11680 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11684 ; GFX12-CU: ; %bb.0: ; %entry
11685 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
11686 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11687 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
11688 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
11690 ; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
11700 ; GFX6: ; %bb.0: ; %entry
11701 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11702 ; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2
11703 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
11706 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
11713 ; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc
11714 ; GFX6-NEXT: s_waitcnt vmcnt(0)
11715 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
11719 ; GFX7: ; %bb.0: ; %entry
11720 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11721 ; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
11722 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
11726 ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc
11729 ; GFX7-NEXT: s_waitcnt vmcnt(0)
11730 ; GFX7-NEXT: flat_store_dword v[0:1], v2
11734 ; GFX10-WGP: ; %bb.0: ; %entry
11735 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
11736 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11737 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
11738 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
11741 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
11746 ; GFX10-CU: ; %bb.0: ; %entry
11747 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
11748 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11749 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
11750 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
11753 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
11758 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
11759 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11760 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2
11761 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
11764 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
11771 ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc
11772 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
11773 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
11777 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
11778 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11779 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11780 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
11781 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11784 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
11789 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
11790 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11791 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11792 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
11793 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11796 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
11801 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
11802 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11803 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11804 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
11805 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11807 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0
11808 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
11809 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
11813 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
11814 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11815 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11816 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
11817 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11819 ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0
11820 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
11821 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
11825 ; GFX11-WGP: ; %bb.0: ; %entry
11826 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
11827 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11828 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
11829 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
11831 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
11832 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
11833 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
11837 ; GFX11-CU: ; %bb.0: ; %entry
11838 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
11839 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11840 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
11841 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
11843 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
11844 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
11845 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
11849 ; GFX12-WGP: ; %bb.0: ; %entry
11850 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
11851 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11852 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
11853 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
11855 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
11856 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
11857 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
11861 ; GFX12-CU: ; %bb.0: ; %entry
11862 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
11863 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
11864 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
11865 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
11867 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
11868 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
11869 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
11880 ; GFX6: ; %bb.0: ; %entry
11881 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11882 ; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2
11883 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
11886 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
11893 ; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc
11894 ; GFX6-NEXT: s_waitcnt vmcnt(0)
11895 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
11899 ; GFX7: ; %bb.0: ; %entry
11900 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11901 ; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
11902 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
11906 ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc
11909 ; GFX7-NEXT: s_waitcnt vmcnt(0)
11910 ; GFX7-NEXT: flat_store_dword v[0:1], v2
11914 ; GFX10-WGP: ; %bb.0: ; %entry
11915 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
11916 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11917 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
11918 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
11921 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
11926 ; GFX10-CU: ; %bb.0: ; %entry
11927 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
11928 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11929 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
11930 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
11933 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
11938 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
11939 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11940 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2
11941 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
11944 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
11951 ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc
11952 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
11953 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
11957 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
11958 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11959 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11960 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
11961 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11964 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
11969 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
11970 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11971 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
11972 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
11973 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11976 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
11981 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
11982 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11983 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11984 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
11985 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11987 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0
11988 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
11989 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
11993 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
11994 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
11995 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11996 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
11997 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
11999 ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0
12000 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
12001 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
12005 ; GFX11-WGP: ; %bb.0: ; %entry
12006 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
12007 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12008 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
12009 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
12011 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
12012 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
12013 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
12017 ; GFX11-CU: ; %bb.0: ; %entry
12018 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
12019 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12020 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
12021 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
12023 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
12024 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
12025 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
12029 ; GFX12-WGP: ; %bb.0: ; %entry
12030 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
12031 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12032 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
12033 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
12035 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
12036 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
12037 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
12041 ; GFX12-CU: ; %bb.0: ; %entry
12042 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
12043 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12044 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
12045 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
12047 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
12048 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
12049 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
12060 ; GFX6: ; %bb.0: ; %entry
12061 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12062 ; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2
12063 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
12066 ; GFX6-NEXT: s_mov_b32 s9, 0x100f000
12073 ; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc
12074 ; GFX6-NEXT: s_waitcnt vmcnt(0)
12075 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
12079 ; GFX7: ; %bb.0: ; %entry
12080 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12081 ; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
12082 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
12086 ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc
12089 ; GFX7-NEXT: s_waitcnt vmcnt(0)
12090 ; GFX7-NEXT: flat_store_dword v[0:1], v2
12094 ; GFX10-WGP: ; %bb.0: ; %entry
12095 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
12096 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12097 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
12098 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
12101 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
12106 ; GFX10-CU: ; %bb.0: ; %entry
12107 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
12108 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12109 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
12110 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
12113 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
12118 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
12119 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
12120 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2
12121 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
12124 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000
12131 ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc
12132 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
12133 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
12137 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
12138 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
12139 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12140 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
12141 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
12144 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
12149 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
12150 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
12151 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12152 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
12153 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
12156 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
12161 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
12162 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
12163 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
12164 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
12165 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
12167 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0
12168 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
12169 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
12173 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
12174 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
12175 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
12176 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
12177 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
12179 ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0
12180 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
12181 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
12185 ; GFX11-WGP: ; %bb.0: ; %entry
12186 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
12187 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12188 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
12189 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
12191 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
12192 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
12193 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
12197 ; GFX11-CU: ; %bb.0: ; %entry
12198 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
12199 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12200 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
12201 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
12203 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
12204 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
12205 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
12209 ; GFX12-WGP: ; %bb.0: ; %entry
12210 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
12211 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12212 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
12213 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
12215 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
12216 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
12217 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
12221 ; GFX12-CU: ; %bb.0: ; %entry
12222 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
12223 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12224 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
12225 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
12227 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
12228 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
12229 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
12240 ; GFX6: ; %bb.0: ; %entry
12242 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
12243 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
12244 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
12245 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
12248 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
12258 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
12262 ; GFX7: ; %bb.0: ; %entry
12264 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
12265 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
12266 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
12268 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
12283 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
12287 ; GFX10-WGP: ; %bb.0: ; %entry
12288 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
12289 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12290 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
12291 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
12292 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
12301 ; GFX10-CU: ; %bb.0: ; %entry
12302 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
12303 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12304 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
12305 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
12306 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
12315 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
12317 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
12318 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
12319 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
12320 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
12323 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
12333 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
12337 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
12338 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
12339 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12340 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
12341 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
12342 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
12351 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
12352 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
12353 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12354 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
12355 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
12356 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
12365 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
12366 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
12367 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
12368 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
12369 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
12370 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
12375 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
12379 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
12380 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
12381 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
12382 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
12383 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
12384 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
12389 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
12393 ; GFX11-WGP: ; %bb.0: ; %entry
12394 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
12395 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12396 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
12397 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
12398 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
12403 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
12407 ; GFX11-CU: ; %bb.0: ; %entry
12408 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
12409 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12410 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
12411 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
12412 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
12417 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
12421 ; GFX12-WGP: ; %bb.0: ; %entry
12422 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
12423 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12424 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
12425 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
12426 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
12431 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
12435 ; GFX12-CU: ; %bb.0: ; %entry
12436 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
12437 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12438 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
12439 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
12440 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
12445 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
12456 ; GFX6: ; %bb.0: ; %entry
12458 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
12459 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
12460 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
12461 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
12464 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
12474 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
12478 ; GFX7: ; %bb.0: ; %entry
12480 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
12481 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
12482 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
12484 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
12499 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
12503 ; GFX10-WGP: ; %bb.0: ; %entry
12504 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
12505 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12506 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
12507 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
12508 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
12517 ; GFX10-CU: ; %bb.0: ; %entry
12518 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
12519 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12520 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
12521 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
12522 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
12531 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
12533 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
12534 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
12535 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
12536 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
12539 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
12549 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
12553 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
12554 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
12555 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12556 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
12557 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
12558 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
12567 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
12568 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
12569 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12570 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
12571 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
12572 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
12581 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
12582 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
12583 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
12584 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
12585 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
12586 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
12591 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
12595 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
12596 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
12597 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
12598 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
12599 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
12600 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
12605 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
12609 ; GFX11-WGP: ; %bb.0: ; %entry
12610 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
12611 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12612 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
12613 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
12614 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
12619 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
12623 ; GFX11-CU: ; %bb.0: ; %entry
12624 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
12625 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12626 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
12627 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
12628 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
12633 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
12637 ; GFX12-WGP: ; %bb.0: ; %entry
12638 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
12639 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12640 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
12641 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
12642 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
12647 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
12651 ; GFX12-CU: ; %bb.0: ; %entry
12652 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
12653 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12654 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
12655 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
12656 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
12661 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
12672 ; GFX6: ; %bb.0: ; %entry
12674 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
12675 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
12676 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
12677 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
12680 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
12690 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
12694 ; GFX7: ; %bb.0: ; %entry
12696 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
12697 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
12698 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
12700 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
12715 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
12719 ; GFX10-WGP: ; %bb.0: ; %entry
12720 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
12721 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12722 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
12723 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
12724 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
12733 ; GFX10-CU: ; %bb.0: ; %entry
12734 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
12735 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12736 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
12737 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
12738 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
12747 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
12749 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
12750 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
12751 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
12752 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
12755 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
12765 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
12769 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
12770 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
12771 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12772 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
12773 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
12774 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
12783 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
12784 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
12785 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12786 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
12787 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
12788 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
12797 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
12798 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
12799 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
12800 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
12801 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
12802 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
12807 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
12811 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
12812 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
12813 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
12814 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
12815 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
12816 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
12821 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
12825 ; GFX11-WGP: ; %bb.0: ; %entry
12826 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
12827 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12828 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
12829 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
12830 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
12835 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
12839 ; GFX11-CU: ; %bb.0: ; %entry
12840 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
12841 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12842 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
12843 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
12844 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
12849 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
12853 ; GFX12-WGP: ; %bb.0: ; %entry
12854 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
12855 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12856 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
12857 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
12858 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
12863 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
12867 ; GFX12-CU: ; %bb.0: ; %entry
12868 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
12869 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
12870 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
12871 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
12872 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
12877 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
12888 ; GFX6: ; %bb.0: ; %entry
12890 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
12891 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
12892 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
12893 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
12896 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
12906 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
12910 ; GFX7: ; %bb.0: ; %entry
12912 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
12913 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
12914 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
12916 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
12931 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
12935 ; GFX10-WGP: ; %bb.0: ; %entry
12936 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
12937 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12938 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
12939 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
12940 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
12949 ; GFX10-CU: ; %bb.0: ; %entry
12950 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
12951 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12952 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
12953 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
12954 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
12963 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
12965 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
12966 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
12967 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
12968 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
12971 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
12981 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
12985 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
12986 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
12987 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12988 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
12989 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
12990 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
12999 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
13000 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
13001 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
13002 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
13003 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
13004 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
13013 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
13014 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
13015 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
13016 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
13017 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
13018 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
13023 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
13027 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
13028 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
13029 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
13030 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
13031 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
13032 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
13037 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
13041 ; GFX11-WGP: ; %bb.0: ; %entry
13042 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
13043 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13044 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
13045 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
13046 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
13051 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13055 ; GFX11-CU: ; %bb.0: ; %entry
13056 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
13057 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13058 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
13059 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
13060 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
13065 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13069 ; GFX12-WGP: ; %bb.0: ; %entry
13070 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
13071 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13072 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
13073 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
13074 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
13079 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13083 ; GFX12-CU: ; %bb.0: ; %entry
13084 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
13085 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13086 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
13087 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
13088 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
13093 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13104 ; GFX6: ; %bb.0: ; %entry
13106 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
13107 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
13108 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
13109 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
13112 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
13122 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
13126 ; GFX7: ; %bb.0: ; %entry
13128 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
13129 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
13130 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
13132 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
13147 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
13151 ; GFX10-WGP: ; %bb.0: ; %entry
13152 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
13153 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
13154 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
13155 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
13156 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
13165 ; GFX10-CU: ; %bb.0: ; %entry
13166 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
13167 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
13168 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
13169 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
13170 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
13179 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
13181 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
13182 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
13183 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
13184 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
13187 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
13197 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
13201 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
13202 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
13203 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
13204 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
13205 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
13206 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
13215 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
13216 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
13217 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
13218 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
13219 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
13220 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
13229 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
13230 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
13231 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
13232 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
13233 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
13234 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
13239 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
13243 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
13244 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
13245 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
13246 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
13247 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
13248 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
13253 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
13257 ; GFX11-WGP: ; %bb.0: ; %entry
13258 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
13259 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13260 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
13261 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
13262 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
13267 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13271 ; GFX11-CU: ; %bb.0: ; %entry
13272 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
13273 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13274 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
13275 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
13276 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
13281 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13285 ; GFX12-WGP: ; %bb.0: ; %entry
13286 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
13287 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13288 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
13289 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
13290 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
13295 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13299 ; GFX12-CU: ; %bb.0: ; %entry
13300 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
13301 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13302 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
13303 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
13304 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
13309 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13320 ; GFX6: ; %bb.0: ; %entry
13322 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
13323 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
13324 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
13325 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
13328 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
13338 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
13342 ; GFX7: ; %bb.0: ; %entry
13344 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
13345 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
13346 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
13348 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
13363 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
13367 ; GFX10-WGP: ; %bb.0: ; %entry
13368 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
13369 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
13370 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
13371 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
13372 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
13381 ; GFX10-CU: ; %bb.0: ; %entry
13382 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
13383 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
13384 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
13385 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
13386 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
13395 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
13397 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
13398 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
13399 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
13400 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
13403 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
13413 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
13417 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
13418 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
13419 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
13420 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
13421 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
13422 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
13431 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
13432 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
13433 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
13434 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
13435 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
13436 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
13445 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
13446 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
13447 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
13448 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
13449 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
13450 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
13455 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
13459 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
13460 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
13461 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
13462 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
13463 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
13464 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
13469 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
13473 ; GFX11-WGP: ; %bb.0: ; %entry
13474 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
13475 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13476 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
13477 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
13478 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
13483 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13487 ; GFX11-CU: ; %bb.0: ; %entry
13488 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
13489 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13490 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
13491 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
13492 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
13497 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13501 ; GFX12-WGP: ; %bb.0: ; %entry
13502 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
13503 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13504 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
13505 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
13506 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
13511 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13515 ; GFX12-CU: ; %bb.0: ; %entry
13516 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
13517 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13518 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
13519 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
13520 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
13525 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13536 ; GFX6: ; %bb.0: ; %entry
13538 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
13539 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
13540 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
13541 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
13544 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
13554 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
13558 ; GFX7: ; %bb.0: ; %entry
13560 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
13561 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
13562 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
13564 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
13579 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
13583 ; GFX10-WGP: ; %bb.0: ; %entry
13584 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
13585 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
13586 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
13587 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
13588 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
13597 ; GFX10-CU: ; %bb.0: ; %entry
13598 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
13599 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
13600 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
13601 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
13602 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
13611 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
13613 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
13614 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
13615 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
13616 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
13619 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
13629 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
13633 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
13634 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
13635 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
13636 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
13637 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
13638 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
13647 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
13648 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
13649 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
13650 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
13651 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
13652 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
13661 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
13662 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
13663 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
13664 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
13665 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
13666 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
13671 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
13675 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
13676 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
13677 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
13678 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
13679 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
13680 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
13685 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
13689 ; GFX11-WGP: ; %bb.0: ; %entry
13690 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
13691 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13692 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
13693 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
13694 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
13699 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13703 ; GFX11-CU: ; %bb.0: ; %entry
13704 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
13705 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13706 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
13707 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
13708 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
13713 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13717 ; GFX12-WGP: ; %bb.0: ; %entry
13718 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
13719 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13720 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
13721 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
13722 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
13727 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13731 ; GFX12-CU: ; %bb.0: ; %entry
13732 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
13733 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13734 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
13735 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
13736 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
13741 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13752 ; GFX6: ; %bb.0: ; %entry
13754 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
13755 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
13756 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
13757 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
13760 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
13770 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
13774 ; GFX7: ; %bb.0: ; %entry
13776 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
13777 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
13778 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
13780 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
13795 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
13799 ; GFX10-WGP: ; %bb.0: ; %entry
13800 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
13801 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
13802 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
13803 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
13804 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
13813 ; GFX10-CU: ; %bb.0: ; %entry
13814 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
13815 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
13816 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
13817 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
13818 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
13827 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
13829 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
13830 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
13831 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
13832 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
13835 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
13845 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
13849 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
13850 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
13851 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
13852 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
13853 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
13854 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
13863 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
13864 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
13865 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
13866 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
13867 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
13868 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
13877 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
13878 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
13879 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
13880 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
13881 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
13882 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
13887 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
13891 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
13892 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
13893 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
13894 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
13895 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
13896 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
13901 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
13905 ; GFX11-WGP: ; %bb.0: ; %entry
13906 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
13907 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13908 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
13909 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
13910 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
13915 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13919 ; GFX11-CU: ; %bb.0: ; %entry
13920 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
13921 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13922 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
13923 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
13924 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
13929 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13933 ; GFX12-WGP: ; %bb.0: ; %entry
13934 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
13935 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13936 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
13937 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
13938 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
13943 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13947 ; GFX12-CU: ; %bb.0: ; %entry
13948 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
13949 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13950 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
13951 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
13952 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
13957 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13968 ; GFX6: ; %bb.0: ; %entry
13970 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
13971 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
13972 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
13973 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
13976 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
13986 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
13990 ; GFX7: ; %bb.0: ; %entry
13992 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
13993 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
13994 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
13996 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
14011 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
14015 ; GFX10-WGP: ; %bb.0: ; %entry
14016 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
14017 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14018 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
14019 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
14020 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
14029 ; GFX10-CU: ; %bb.0: ; %entry
14030 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
14031 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14032 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
14033 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
14034 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
14043 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
14045 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
14046 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
14047 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
14048 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
14051 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
14061 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
14065 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
14066 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14067 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14068 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
14069 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
14070 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14079 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
14080 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14081 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14082 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
14083 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
14084 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14093 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
14094 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14095 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
14096 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
14097 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
14098 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14103 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
14107 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
14108 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14109 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
14110 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
14111 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
14112 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14117 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
14121 ; GFX11-WGP: ; %bb.0: ; %entry
14122 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
14123 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
14124 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
14125 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
14126 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
14131 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14135 ; GFX11-CU: ; %bb.0: ; %entry
14136 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
14137 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
14138 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
14139 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
14140 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
14145 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14149 ; GFX12-WGP: ; %bb.0: ; %entry
14150 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
14151 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
14152 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
14153 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
14154 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
14159 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14163 ; GFX12-CU: ; %bb.0: ; %entry
14164 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
14165 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
14166 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
14167 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
14168 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
14173 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14184 ; GFX6: ; %bb.0: ; %entry
14186 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
14187 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
14188 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
14189 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
14192 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
14202 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
14206 ; GFX7: ; %bb.0: ; %entry
14208 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
14209 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
14210 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
14212 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
14227 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
14231 ; GFX10-WGP: ; %bb.0: ; %entry
14232 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
14233 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14234 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
14235 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
14236 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
14245 ; GFX10-CU: ; %bb.0: ; %entry
14246 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
14247 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14248 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
14249 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
14250 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
14259 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
14261 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
14262 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
14263 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
14264 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
14267 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
14277 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
14281 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
14282 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14283 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14284 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
14285 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
14286 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14295 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
14296 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14297 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14298 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
14299 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
14300 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14309 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
14310 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14311 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
14312 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
14313 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
14314 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14319 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
14323 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
14324 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14325 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
14326 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
14327 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
14328 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14333 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
14337 ; GFX11-WGP: ; %bb.0: ; %entry
14338 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
14339 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
14340 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
14341 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
14342 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
14347 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14351 ; GFX11-CU: ; %bb.0: ; %entry
14352 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
14353 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
14354 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
14355 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
14356 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
14361 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14365 ; GFX12-WGP: ; %bb.0: ; %entry
14366 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
14367 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
14368 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
14369 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
14370 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
14375 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14379 ; GFX12-CU: ; %bb.0: ; %entry
14380 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
14381 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
14382 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
14383 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
14384 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
14389 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14400 ; GFX6: ; %bb.0: ; %entry
14402 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
14403 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
14404 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
14405 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
14408 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
14418 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
14422 ; GFX7: ; %bb.0: ; %entry
14424 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
14425 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
14426 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
14428 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
14443 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
14447 ; GFX10-WGP: ; %bb.0: ; %entry
14448 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
14449 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14450 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
14451 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
14452 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
14461 ; GFX10-CU: ; %bb.0: ; %entry
14462 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
14463 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14464 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
14465 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
14466 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
14475 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
14477 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
14478 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
14479 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
14480 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
14483 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
14493 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
14497 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
14498 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14499 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14500 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
14501 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
14502 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14511 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
14512 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14513 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14514 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
14515 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
14516 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14525 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
14526 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14527 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
14528 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
14529 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
14530 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14535 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
14539 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
14540 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14541 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
14542 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
14543 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
14544 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14549 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
14553 ; GFX11-WGP: ; %bb.0: ; %entry
14554 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
14555 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
14556 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
14557 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
14558 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
14563 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14567 ; GFX11-CU: ; %bb.0: ; %entry
14568 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
14569 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
14570 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
14571 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
14572 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
14577 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14581 ; GFX12-WGP: ; %bb.0: ; %entry
14582 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
14583 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
14584 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
14585 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
14586 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
14591 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14595 ; GFX12-CU: ; %bb.0: ; %entry
14596 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
14597 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
14598 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
14599 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
14600 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
14605 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14616 ; GFX6: ; %bb.0: ; %entry
14618 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
14619 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
14620 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
14621 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
14624 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
14634 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
14638 ; GFX7: ; %bb.0: ; %entry
14640 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
14641 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
14642 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
14644 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
14659 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
14663 ; GFX10-WGP: ; %bb.0: ; %entry
14664 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
14665 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14666 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
14667 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
14668 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
14677 ; GFX10-CU: ; %bb.0: ; %entry
14678 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
14679 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14680 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
14681 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
14682 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
14691 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
14693 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
14694 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
14695 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
14696 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
14699 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
14709 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
14713 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
14714 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14715 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14716 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
14717 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
14718 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14727 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
14728 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14729 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14730 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
14731 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
14732 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14741 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
14742 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14743 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
14744 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
14745 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
14746 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14751 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
14755 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
14756 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14757 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
14758 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
14759 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
14760 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14765 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
14769 ; GFX11-WGP: ; %bb.0: ; %entry
14770 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
14771 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
14772 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
14773 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
14774 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
14779 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14783 ; GFX11-CU: ; %bb.0: ; %entry
14784 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
14785 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
14786 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
14787 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
14788 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
14793 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14797 ; GFX12-WGP: ; %bb.0: ; %entry
14798 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
14799 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
14800 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
14801 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
14802 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
14807 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14811 ; GFX12-CU: ; %bb.0: ; %entry
14812 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
14813 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
14814 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
14815 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
14816 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
14821 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14832 ; GFX6: ; %bb.0: ; %entry
14834 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
14835 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
14836 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
14837 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
14840 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
14850 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
14854 ; GFX7: ; %bb.0: ; %entry
14856 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
14857 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
14858 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
14860 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
14875 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
14879 ; GFX10-WGP: ; %bb.0: ; %entry
14880 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
14881 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14882 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
14883 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
14884 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
14893 ; GFX10-CU: ; %bb.0: ; %entry
14894 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
14895 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14896 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
14897 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
14898 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
14907 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
14909 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
14910 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
14911 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
14912 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
14915 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
14925 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
14929 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
14930 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14931 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14932 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
14933 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
14934 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14943 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
14944 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14945 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
14946 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
14947 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
14948 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14957 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
14958 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14959 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
14960 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
14961 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
14962 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14967 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
14971 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
14972 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
14973 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
14974 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
14975 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
14976 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
14981 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
14985 ; GFX11-WGP: ; %bb.0: ; %entry
14986 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
14987 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
14988 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
14989 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
14990 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
14995 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14999 ; GFX11-CU: ; %bb.0: ; %entry
15000 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
15001 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15002 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
15003 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
15004 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
15009 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15013 ; GFX12-WGP: ; %bb.0: ; %entry
15014 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
15015 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15016 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
15017 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
15018 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
15023 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15027 ; GFX12-CU: ; %bb.0: ; %entry
15028 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
15029 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15030 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
15031 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
15032 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
15037 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15048 ; GFX6: ; %bb.0: ; %entry
15050 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
15051 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
15052 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
15053 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
15056 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
15066 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
15070 ; GFX7: ; %bb.0: ; %entry
15072 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
15073 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
15074 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
15076 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
15091 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
15095 ; GFX10-WGP: ; %bb.0: ; %entry
15096 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
15097 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
15098 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
15099 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
15100 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
15109 ; GFX10-CU: ; %bb.0: ; %entry
15110 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
15111 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
15112 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
15113 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
15114 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
15123 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
15125 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
15126 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
15127 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
15128 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
15131 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
15141 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
15145 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
15146 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
15147 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
15148 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
15149 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
15150 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
15159 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
15160 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
15161 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
15162 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
15163 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
15164 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
15173 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
15174 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
15175 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
15176 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
15177 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
15178 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
15183 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
15187 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
15188 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
15189 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
15190 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
15191 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
15192 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
15197 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
15201 ; GFX11-WGP: ; %bb.0: ; %entry
15202 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
15203 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15204 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
15205 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
15206 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
15211 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15215 ; GFX11-CU: ; %bb.0: ; %entry
15216 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
15217 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15218 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
15219 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
15220 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
15225 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15229 ; GFX12-WGP: ; %bb.0: ; %entry
15230 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
15231 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15232 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
15233 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
15234 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
15239 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15243 ; GFX12-CU: ; %bb.0: ; %entry
15244 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
15245 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15246 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
15247 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
15248 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
15253 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15264 ; GFX6: ; %bb.0: ; %entry
15266 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
15267 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
15268 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
15269 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
15272 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
15282 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
15286 ; GFX7: ; %bb.0: ; %entry
15288 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
15289 ; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2
15290 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3
15292 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
15307 ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
15311 ; GFX10-WGP: ; %bb.0: ; %entry
15312 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
15313 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
15314 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
15315 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
15316 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
15325 ; GFX10-CU: ; %bb.0: ; %entry
15326 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
15327 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
15328 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
15329 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
15330 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
15339 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
15341 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
15342 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
15343 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
15344 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
15347 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
15357 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
15361 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
15362 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
15363 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
15364 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
15365 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
15366 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
15375 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
15376 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
15377 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
15378 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
15379 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
15380 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
15389 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
15390 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
15391 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
15392 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
15393 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
15394 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
15399 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
15403 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
15404 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
15405 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
15406 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
15407 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
15408 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
15413 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
15417 ; GFX11-WGP: ; %bb.0: ; %entry
15418 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
15419 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15420 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
15421 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
15422 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
15427 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15431 ; GFX11-CU: ; %bb.0: ; %entry
15432 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
15433 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15434 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
15435 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
15436 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
15441 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15445 ; GFX12-WGP: ; %bb.0: ; %entry
15446 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
15447 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15448 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
15449 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
15450 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
15455 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15459 ; GFX12-CU: ; %bb.0: ; %entry
15460 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
15461 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15462 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
15463 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
15464 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
15469 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15480 ; GFX6: ; %bb.0: ; %entry
15482 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
15483 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
15484 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
15485 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
15488 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
15498 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
15500 ; GFX6-NEXT: s_waitcnt vmcnt(0)
15501 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
15505 ; GFX7: ; %bb.0: ; %entry
15507 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
15508 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
15509 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
15511 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
15526 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
15529 ; GFX7-NEXT: s_waitcnt vmcnt(0)
15530 ; GFX7-NEXT: flat_store_dword v[0:1], v2
15534 ; GFX10-WGP: ; %bb.0: ; %entry
15535 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
15536 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
15537 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
15538 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
15539 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
15545 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
15550 ; GFX10-CU: ; %bb.0: ; %entry
15551 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
15552 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
15553 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
15554 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
15555 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
15561 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
15566 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
15568 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
15569 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
15570 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
15571 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
15574 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
15584 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
15586 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
15587 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
15591 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
15592 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
15593 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
15594 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
15595 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
15596 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
15602 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
15607 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
15608 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
15609 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
15610 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
15611 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
15612 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
15618 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
15623 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
15624 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
15625 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
15626 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
15627 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
15628 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
15633 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
15634 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
15635 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
15639 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
15640 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
15641 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
15642 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
15643 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
15644 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
15649 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
15650 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
15651 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
15655 ; GFX11-WGP: ; %bb.0: ; %entry
15656 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
15657 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15658 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
15659 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
15660 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
15665 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
15666 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
15667 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
15671 ; GFX11-CU: ; %bb.0: ; %entry
15672 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
15673 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15674 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
15675 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
15676 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
15681 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
15682 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
15683 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
15687 ; GFX12-WGP: ; %bb.0: ; %entry
15688 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
15689 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15690 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
15691 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
15692 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
15697 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
15698 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
15699 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
15703 ; GFX12-CU: ; %bb.0: ; %entry
15704 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
15705 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15706 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
15707 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
15708 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
15713 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
15714 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
15715 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
15721 %val0 = extractvalue { i32, i1 } %val, 0
15728 ; GFX6: ; %bb.0: ; %entry
15730 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
15731 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
15732 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
15733 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
15736 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
15746 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
15748 ; GFX6-NEXT: s_waitcnt vmcnt(0)
15749 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
15753 ; GFX7: ; %bb.0: ; %entry
15755 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
15756 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
15757 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
15759 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
15774 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
15777 ; GFX7-NEXT: s_waitcnt vmcnt(0)
15778 ; GFX7-NEXT: flat_store_dword v[0:1], v2
15782 ; GFX10-WGP: ; %bb.0: ; %entry
15783 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
15784 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
15785 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
15786 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
15787 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
15793 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
15798 ; GFX10-CU: ; %bb.0: ; %entry
15799 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
15800 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
15801 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
15802 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
15803 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
15809 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
15814 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
15816 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
15817 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
15818 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
15819 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
15822 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
15832 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
15834 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
15835 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
15839 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
15840 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
15841 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
15842 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
15843 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
15844 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
15850 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
15855 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
15856 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
15857 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
15858 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
15859 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
15860 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
15866 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
15871 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
15872 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
15873 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
15874 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
15875 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
15876 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
15881 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
15882 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
15883 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
15887 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
15888 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
15889 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
15890 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
15891 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
15892 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
15897 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
15898 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
15899 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
15903 ; GFX11-WGP: ; %bb.0: ; %entry
15904 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
15905 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15906 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
15907 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
15908 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
15913 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
15914 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
15915 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
15919 ; GFX11-CU: ; %bb.0: ; %entry
15920 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
15921 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15922 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
15923 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
15924 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
15929 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
15930 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
15931 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
15935 ; GFX12-WGP: ; %bb.0: ; %entry
15936 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
15937 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15938 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
15939 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
15940 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
15945 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
15946 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
15947 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
15951 ; GFX12-CU: ; %bb.0: ; %entry
15952 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
15953 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15954 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
15955 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
15956 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
15961 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
15962 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
15963 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
15969 %val0 = extractvalue { i32, i1 } %val, 0
15976 ; GFX6: ; %bb.0: ; %entry
15978 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
15979 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
15980 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
15981 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
15984 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
15994 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
15996 ; GFX6-NEXT: s_waitcnt vmcnt(0)
15997 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
16001 ; GFX7: ; %bb.0: ; %entry
16003 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
16004 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
16005 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
16007 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
16022 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
16025 ; GFX7-NEXT: s_waitcnt vmcnt(0)
16026 ; GFX7-NEXT: flat_store_dword v[0:1], v2
16030 ; GFX10-WGP: ; %bb.0: ; %entry
16031 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
16032 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
16033 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
16034 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
16035 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
16041 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
16046 ; GFX10-CU: ; %bb.0: ; %entry
16047 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
16048 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
16049 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
16050 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
16051 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
16057 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
16062 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
16064 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
16065 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
16066 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
16067 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
16070 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
16080 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
16082 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
16083 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
16087 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
16088 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
16089 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
16090 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
16091 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
16092 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
16098 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
16103 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
16104 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
16105 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
16106 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
16107 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
16108 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
16114 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
16119 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
16120 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
16121 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
16122 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
16123 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
16124 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
16129 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
16130 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
16131 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
16135 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
16136 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
16137 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
16138 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
16139 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
16140 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
16145 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
16146 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
16147 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
16151 ; GFX11-WGP: ; %bb.0: ; %entry
16152 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
16153 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
16154 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
16155 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
16156 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
16161 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
16162 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
16163 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
16167 ; GFX11-CU: ; %bb.0: ; %entry
16168 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
16169 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
16170 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
16171 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
16172 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
16177 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
16178 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
16179 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
16183 ; GFX12-WGP: ; %bb.0: ; %entry
16184 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
16185 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
16186 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
16187 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
16188 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
16193 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
16194 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
16195 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
16199 ; GFX12-CU: ; %bb.0: ; %entry
16200 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
16201 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
16202 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
16203 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
16204 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
16209 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
16210 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
16211 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
16217 %val0 = extractvalue { i32, i1 } %val, 0
16224 ; GFX6: ; %bb.0: ; %entry
16226 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
16227 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
16228 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
16229 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
16232 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
16242 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
16244 ; GFX6-NEXT: s_waitcnt vmcnt(0)
16245 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
16249 ; GFX7: ; %bb.0: ; %entry
16251 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
16252 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
16253 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
16255 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
16270 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
16273 ; GFX7-NEXT: s_waitcnt vmcnt(0)
16274 ; GFX7-NEXT: flat_store_dword v[0:1], v2
16278 ; GFX10-WGP: ; %bb.0: ; %entry
16279 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
16280 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
16281 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
16282 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
16283 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
16289 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
16294 ; GFX10-CU: ; %bb.0: ; %entry
16295 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
16296 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
16297 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
16298 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
16299 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
16305 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
16310 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
16312 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
16313 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
16314 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
16315 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
16318 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
16328 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
16330 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
16331 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
16335 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
16336 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
16337 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
16338 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
16339 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
16340 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
16346 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
16351 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
16352 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
16353 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
16354 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
16355 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
16356 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
16362 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
16367 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
16368 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
16369 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
16370 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
16371 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
16372 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
16377 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
16378 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
16379 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
16383 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
16384 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
16385 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
16386 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
16387 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
16388 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
16393 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
16394 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
16395 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
16399 ; GFX11-WGP: ; %bb.0: ; %entry
16400 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
16401 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
16402 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
16403 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
16404 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
16409 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
16410 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
16411 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
16415 ; GFX11-CU: ; %bb.0: ; %entry
16416 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
16417 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
16418 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
16419 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
16420 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
16425 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
16426 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
16427 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
16431 ; GFX12-WGP: ; %bb.0: ; %entry
16432 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
16433 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
16434 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
16435 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
16436 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
16441 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
16442 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
16443 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
16447 ; GFX12-CU: ; %bb.0: ; %entry
16448 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
16449 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
16450 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
16451 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
16452 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
16457 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
16458 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
16459 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
16465 %val0 = extractvalue { i32, i1 } %val, 0
16472 ; GFX6: ; %bb.0: ; %entry
16474 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
16475 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
16476 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
16477 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
16480 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
16490 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
16492 ; GFX6-NEXT: s_waitcnt vmcnt(0)
16493 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
16497 ; GFX7: ; %bb.0: ; %entry
16499 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
16500 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
16501 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
16503 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
16518 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
16521 ; GFX7-NEXT: s_waitcnt vmcnt(0)
16522 ; GFX7-NEXT: flat_store_dword v[0:1], v2
16526 ; GFX10-WGP: ; %bb.0: ; %entry
16527 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
16528 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
16529 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
16530 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
16531 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
16537 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
16542 ; GFX10-CU: ; %bb.0: ; %entry
16543 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
16544 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
16545 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
16546 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
16547 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
16553 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
16558 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
16560 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
16561 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
16562 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
16563 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
16566 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
16576 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
16578 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
16579 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
16583 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
16584 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
16585 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
16586 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
16587 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
16588 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
16594 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
16599 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
16600 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
16601 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
16602 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
16603 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
16604 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
16610 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
16615 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
16616 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
16617 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
16618 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
16619 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
16620 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
16625 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
16626 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
16627 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
16631 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
16632 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
16633 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
16634 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
16635 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
16636 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
16641 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
16642 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
16643 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
16647 ; GFX11-WGP: ; %bb.0: ; %entry
16648 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
16649 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
16650 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
16651 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
16652 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
16657 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
16658 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
16659 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
16663 ; GFX11-CU: ; %bb.0: ; %entry
16664 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
16665 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
16666 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
16667 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
16668 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
16673 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
16674 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
16675 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
16679 ; GFX12-WGP: ; %bb.0: ; %entry
16680 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
16681 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
16682 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
16683 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
16684 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
16689 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
16690 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
16691 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
16695 ; GFX12-CU: ; %bb.0: ; %entry
16696 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
16697 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
16698 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
16699 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
16700 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
16705 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
16706 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
16707 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
16713 %val0 = extractvalue { i32, i1 } %val, 0
16720 ; GFX6: ; %bb.0: ; %entry
16722 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
16723 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
16724 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
16725 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
16728 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
16738 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
16740 ; GFX6-NEXT: s_waitcnt vmcnt(0)
16741 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
16745 ; GFX7: ; %bb.0: ; %entry
16747 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
16748 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
16749 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
16751 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
16766 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
16769 ; GFX7-NEXT: s_waitcnt vmcnt(0)
16770 ; GFX7-NEXT: flat_store_dword v[0:1], v2
16774 ; GFX10-WGP: ; %bb.0: ; %entry
16775 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
16776 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
16777 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
16778 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
16779 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
16785 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
16790 ; GFX10-CU: ; %bb.0: ; %entry
16791 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
16792 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
16793 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
16794 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
16795 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
16801 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
16806 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
16808 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
16809 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
16810 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
16811 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
16814 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
16824 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
16826 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
16827 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
16831 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
16832 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
16833 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
16834 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
16835 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
16836 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
16842 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
16847 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
16848 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
16849 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
16850 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
16851 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
16852 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
16858 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
16863 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
16864 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
16865 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
16866 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
16867 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
16868 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
16873 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
16874 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
16875 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
16879 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
16880 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
16881 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
16882 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
16883 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
16884 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
16889 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
16890 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
16891 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
16895 ; GFX11-WGP: ; %bb.0: ; %entry
16896 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
16897 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
16898 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
16899 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
16900 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
16905 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
16906 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
16907 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
16911 ; GFX11-CU: ; %bb.0: ; %entry
16912 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
16913 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
16914 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
16915 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
16916 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
16921 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
16922 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
16923 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
16927 ; GFX12-WGP: ; %bb.0: ; %entry
16928 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
16929 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
16930 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
16931 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
16932 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
16937 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
16938 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
16939 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
16943 ; GFX12-CU: ; %bb.0: ; %entry
16944 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
16945 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
16946 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
16947 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
16948 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
16953 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
16954 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
16955 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
16961 %val0 = extractvalue { i32, i1 } %val, 0
16968 ; GFX6: ; %bb.0: ; %entry
16970 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
16971 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
16972 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
16973 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
16976 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
16986 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
16988 ; GFX6-NEXT: s_waitcnt vmcnt(0)
16989 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
16993 ; GFX7: ; %bb.0: ; %entry
16995 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
16996 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
16997 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
16999 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
17014 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
17017 ; GFX7-NEXT: s_waitcnt vmcnt(0)
17018 ; GFX7-NEXT: flat_store_dword v[0:1], v2
17022 ; GFX10-WGP: ; %bb.0: ; %entry
17023 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
17024 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
17025 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
17026 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
17027 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
17033 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
17038 ; GFX10-CU: ; %bb.0: ; %entry
17039 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
17040 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
17041 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
17042 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
17043 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
17049 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
17054 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
17056 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
17057 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
17058 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
17059 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
17062 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
17072 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
17074 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
17075 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
17079 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
17080 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
17081 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
17082 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
17083 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
17084 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
17090 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
17095 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
17096 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
17097 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
17098 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
17099 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
17100 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
17106 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
17111 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
17112 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
17113 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
17114 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
17115 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
17116 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
17121 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
17122 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
17123 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
17127 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
17128 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
17129 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
17130 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
17131 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
17132 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
17137 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
17138 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
17139 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
17143 ; GFX11-WGP: ; %bb.0: ; %entry
17144 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
17145 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
17146 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
17147 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
17148 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
17153 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
17154 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
17155 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
17159 ; GFX11-CU: ; %bb.0: ; %entry
17160 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
17161 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
17162 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
17163 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
17164 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
17169 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
17170 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
17171 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
17175 ; GFX12-WGP: ; %bb.0: ; %entry
17176 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
17177 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
17178 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
17179 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
17180 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
17185 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
17186 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
17187 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
17191 ; GFX12-CU: ; %bb.0: ; %entry
17192 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
17193 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
17194 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
17195 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
17196 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
17201 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
17202 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
17203 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
17209 %val0 = extractvalue { i32, i1 } %val, 0
17216 ; GFX6: ; %bb.0: ; %entry
17218 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
17219 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
17220 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
17221 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
17224 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
17234 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
17236 ; GFX6-NEXT: s_waitcnt vmcnt(0)
17237 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
17241 ; GFX7: ; %bb.0: ; %entry
17243 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
17244 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
17245 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
17247 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
17262 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
17265 ; GFX7-NEXT: s_waitcnt vmcnt(0)
17266 ; GFX7-NEXT: flat_store_dword v[0:1], v2
17270 ; GFX10-WGP: ; %bb.0: ; %entry
17271 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
17272 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
17273 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
17274 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
17275 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
17281 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
17286 ; GFX10-CU: ; %bb.0: ; %entry
17287 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
17288 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
17289 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
17290 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
17291 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
17297 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
17302 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
17304 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
17305 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
17306 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
17307 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
17310 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
17320 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
17322 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
17323 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
17327 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
17328 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
17329 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
17330 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
17331 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
17332 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
17338 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
17343 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
17344 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
17345 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
17346 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
17347 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
17348 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
17354 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
17359 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
17360 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
17361 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
17362 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
17363 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
17364 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
17369 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
17370 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
17371 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
17375 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
17376 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
17377 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
17378 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
17379 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
17380 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
17385 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
17386 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
17387 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
17391 ; GFX11-WGP: ; %bb.0: ; %entry
17392 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
17393 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
17394 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
17395 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
17396 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
17401 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
17402 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
17403 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
17407 ; GFX11-CU: ; %bb.0: ; %entry
17408 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
17409 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
17410 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
17411 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
17412 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
17417 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
17418 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
17419 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
17423 ; GFX12-WGP: ; %bb.0: ; %entry
17424 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
17425 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
17426 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
17427 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
17428 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
17433 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
17434 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
17435 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
17439 ; GFX12-CU: ; %bb.0: ; %entry
17440 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
17441 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
17442 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
17443 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
17444 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
17449 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
17450 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
17451 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
17457 %val0 = extractvalue { i32, i1 } %val, 0
17464 ; GFX6: ; %bb.0: ; %entry
17466 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
17467 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
17468 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
17469 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
17472 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
17482 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
17484 ; GFX6-NEXT: s_waitcnt vmcnt(0)
17485 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
17489 ; GFX7: ; %bb.0: ; %entry
17491 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
17492 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
17493 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
17495 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
17510 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
17513 ; GFX7-NEXT: s_waitcnt vmcnt(0)
17514 ; GFX7-NEXT: flat_store_dword v[0:1], v2
17518 ; GFX10-WGP: ; %bb.0: ; %entry
17519 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
17520 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
17521 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
17522 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
17523 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
17529 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
17534 ; GFX10-CU: ; %bb.0: ; %entry
17535 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
17536 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
17537 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
17538 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
17539 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
17545 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
17550 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
17552 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
17553 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
17554 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
17555 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
17558 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
17568 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
17570 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
17571 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
17575 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
17576 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
17577 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
17578 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
17579 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
17580 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
17586 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
17591 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
17592 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
17593 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
17594 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
17595 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
17596 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
17602 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
17607 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
17608 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
17609 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
17610 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
17611 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
17612 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
17617 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
17618 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
17619 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
17623 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
17624 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
17625 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
17626 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
17627 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
17628 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
17633 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
17634 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
17635 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
17639 ; GFX11-WGP: ; %bb.0: ; %entry
17640 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
17641 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
17642 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
17643 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
17644 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
17649 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
17650 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
17651 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
17655 ; GFX11-CU: ; %bb.0: ; %entry
17656 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
17657 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
17658 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
17659 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
17660 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
17665 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
17666 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
17667 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
17671 ; GFX12-WGP: ; %bb.0: ; %entry
17672 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
17673 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
17674 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
17675 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
17676 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
17681 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
17682 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
17683 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
17687 ; GFX12-CU: ; %bb.0: ; %entry
17688 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
17689 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
17690 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
17691 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
17692 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
17697 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
17698 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
17699 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
17705 %val0 = extractvalue { i32, i1 } %val, 0
17712 ; GFX6: ; %bb.0: ; %entry
17714 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
17715 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
17716 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
17717 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
17720 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
17730 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
17732 ; GFX6-NEXT: s_waitcnt vmcnt(0)
17733 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
17737 ; GFX7: ; %bb.0: ; %entry
17739 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
17740 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
17741 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
17743 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
17758 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
17761 ; GFX7-NEXT: s_waitcnt vmcnt(0)
17762 ; GFX7-NEXT: flat_store_dword v[0:1], v2
17766 ; GFX10-WGP: ; %bb.0: ; %entry
17767 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
17768 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
17769 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
17770 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
17771 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
17777 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
17782 ; GFX10-CU: ; %bb.0: ; %entry
17783 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
17784 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
17785 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
17786 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
17787 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
17793 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
17798 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
17800 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
17801 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
17802 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
17803 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
17806 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
17816 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
17818 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
17819 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
17823 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
17824 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
17825 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
17826 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
17827 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
17828 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
17834 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
17839 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
17840 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
17841 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
17842 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
17843 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
17844 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
17850 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
17855 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
17856 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
17857 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
17858 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
17859 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
17860 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
17865 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
17866 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
17867 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
17871 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
17872 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
17873 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
17874 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
17875 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
17876 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
17881 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
17882 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
17883 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
17887 ; GFX11-WGP: ; %bb.0: ; %entry
17888 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
17889 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
17890 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
17891 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
17892 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
17897 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
17898 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
17899 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
17903 ; GFX11-CU: ; %bb.0: ; %entry
17904 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
17905 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
17906 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
17907 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
17908 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
17913 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
17914 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
17915 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
17919 ; GFX12-WGP: ; %bb.0: ; %entry
17920 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
17921 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
17922 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
17923 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
17924 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
17929 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
17930 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
17931 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
17935 ; GFX12-CU: ; %bb.0: ; %entry
17936 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
17937 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
17938 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
17939 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
17940 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
17945 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
17946 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
17947 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
17953 %val0 = extractvalue { i32, i1 } %val, 0
17960 ; GFX6: ; %bb.0: ; %entry
17962 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
17963 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
17964 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
17965 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
17968 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
17978 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
17980 ; GFX6-NEXT: s_waitcnt vmcnt(0)
17981 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
17985 ; GFX7: ; %bb.0: ; %entry
17987 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
17988 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
17989 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
17991 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
18006 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
18009 ; GFX7-NEXT: s_waitcnt vmcnt(0)
18010 ; GFX7-NEXT: flat_store_dword v[0:1], v2
18014 ; GFX10-WGP: ; %bb.0: ; %entry
18015 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
18016 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
18017 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
18018 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
18019 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
18025 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
18030 ; GFX10-CU: ; %bb.0: ; %entry
18031 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
18032 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
18033 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
18034 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
18035 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
18041 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
18046 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
18048 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
18049 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
18050 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
18051 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
18054 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
18064 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
18066 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
18067 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
18071 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
18072 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
18073 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
18074 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
18075 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
18076 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
18082 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
18087 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
18088 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
18089 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
18090 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
18091 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
18092 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
18098 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
18103 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
18104 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
18105 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
18106 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
18107 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
18108 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
18113 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
18114 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
18115 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
18119 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
18120 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
18121 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
18122 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
18123 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
18124 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
18129 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
18130 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
18131 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
18135 ; GFX11-WGP: ; %bb.0: ; %entry
18136 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
18137 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
18138 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
18139 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
18140 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
18145 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18146 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
18147 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
18151 ; GFX11-CU: ; %bb.0: ; %entry
18152 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
18153 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
18154 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
18155 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
18156 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
18161 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18162 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
18163 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
18167 ; GFX12-WGP: ; %bb.0: ; %entry
18168 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
18169 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
18170 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
18171 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
18172 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
18177 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
18178 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
18179 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
18183 ; GFX12-CU: ; %bb.0: ; %entry
18184 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
18185 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
18186 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
18187 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
18188 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
18193 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
18194 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
18195 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
18201 %val0 = extractvalue { i32, i1 } %val, 0
18208 ; GFX6: ; %bb.0: ; %entry
18210 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
18211 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
18212 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
18213 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
18216 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
18226 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
18228 ; GFX6-NEXT: s_waitcnt vmcnt(0)
18229 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
18233 ; GFX7: ; %bb.0: ; %entry
18235 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
18236 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
18237 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
18239 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
18254 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
18257 ; GFX7-NEXT: s_waitcnt vmcnt(0)
18258 ; GFX7-NEXT: flat_store_dword v[0:1], v2
18262 ; GFX10-WGP: ; %bb.0: ; %entry
18263 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
18264 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
18265 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
18266 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
18267 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
18273 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
18278 ; GFX10-CU: ; %bb.0: ; %entry
18279 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
18280 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
18281 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
18282 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
18283 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
18289 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
18294 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
18296 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
18297 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
18298 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
18299 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
18302 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
18312 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
18314 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
18315 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
18319 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
18320 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
18321 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
18322 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
18323 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
18324 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
18330 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
18335 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
18336 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
18337 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
18338 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
18339 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
18340 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
18346 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
18351 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
18352 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
18353 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
18354 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
18355 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
18356 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
18361 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
18362 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
18363 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
18367 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
18368 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
18369 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
18370 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
18371 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
18372 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
18377 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
18378 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
18379 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
18383 ; GFX11-WGP: ; %bb.0: ; %entry
18384 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
18385 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
18386 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
18387 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
18388 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
18393 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18394 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
18395 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
18399 ; GFX11-CU: ; %bb.0: ; %entry
18400 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
18401 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
18402 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
18403 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
18404 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
18409 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18410 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
18411 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
18415 ; GFX12-WGP: ; %bb.0: ; %entry
18416 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
18417 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
18418 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
18419 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
18420 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
18425 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
18426 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
18427 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
18431 ; GFX12-CU: ; %bb.0: ; %entry
18432 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
18433 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
18434 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
18435 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
18436 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
18441 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
18442 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
18443 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
18449 %val0 = extractvalue { i32, i1 } %val, 0
18456 ; GFX6: ; %bb.0: ; %entry
18458 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
18459 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
18460 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
18461 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
18464 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
18474 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
18476 ; GFX6-NEXT: s_waitcnt vmcnt(0)
18477 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
18481 ; GFX7: ; %bb.0: ; %entry
18483 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
18484 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
18485 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
18487 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
18502 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
18505 ; GFX7-NEXT: s_waitcnt vmcnt(0)
18506 ; GFX7-NEXT: flat_store_dword v[0:1], v2
18510 ; GFX10-WGP: ; %bb.0: ; %entry
18511 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
18512 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
18513 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
18514 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
18515 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
18521 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
18526 ; GFX10-CU: ; %bb.0: ; %entry
18527 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
18528 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
18529 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
18530 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
18531 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
18537 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
18542 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
18544 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
18545 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
18546 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
18547 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
18550 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
18560 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
18562 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
18563 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
18567 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
18568 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
18569 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
18570 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
18571 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
18572 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
18578 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
18583 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
18584 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
18585 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
18586 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
18587 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
18588 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
18594 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
18599 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
18600 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
18601 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
18602 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
18603 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
18604 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
18609 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
18610 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
18611 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
18615 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
18616 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
18617 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
18618 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
18619 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
18620 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
18625 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
18626 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
18627 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
18631 ; GFX11-WGP: ; %bb.0: ; %entry
18632 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
18633 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
18634 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
18635 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
18636 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
18641 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18642 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
18643 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
18647 ; GFX11-CU: ; %bb.0: ; %entry
18648 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
18649 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
18650 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
18651 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
18652 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
18657 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18658 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
18659 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
18663 ; GFX12-WGP: ; %bb.0: ; %entry
18664 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
18665 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
18666 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
18667 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
18668 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
18673 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
18674 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
18675 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
18679 ; GFX12-CU: ; %bb.0: ; %entry
18680 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
18681 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
18682 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
18683 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
18684 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
18689 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
18690 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
18691 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
18697 %val0 = extractvalue { i32, i1 } %val, 0
18704 ; GFX6: ; %bb.0: ; %entry
18706 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
18707 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
18708 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
18709 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
18712 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
18722 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
18724 ; GFX6-NEXT: s_waitcnt vmcnt(0)
18725 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
18729 ; GFX7: ; %bb.0: ; %entry
18731 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
18732 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
18733 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
18735 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
18750 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
18753 ; GFX7-NEXT: s_waitcnt vmcnt(0)
18754 ; GFX7-NEXT: flat_store_dword v[0:1], v2
18758 ; GFX10-WGP: ; %bb.0: ; %entry
18759 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
18760 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
18761 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
18762 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
18763 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
18769 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
18774 ; GFX10-CU: ; %bb.0: ; %entry
18775 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
18776 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
18777 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
18778 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
18779 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
18785 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
18790 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
18792 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
18793 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
18794 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
18795 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
18798 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
18808 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
18810 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
18811 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
18815 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
18816 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
18817 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
18818 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
18819 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
18820 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
18826 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
18831 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
18832 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
18833 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
18834 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
18835 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
18836 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
18842 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
18847 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
18848 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
18849 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
18850 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
18851 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
18852 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
18857 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
18858 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
18859 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
18863 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
18864 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
18865 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
18866 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
18867 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
18868 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
18873 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
18874 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
18875 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
18879 ; GFX11-WGP: ; %bb.0: ; %entry
18880 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
18881 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
18882 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
18883 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
18884 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
18889 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18890 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
18891 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
18895 ; GFX11-CU: ; %bb.0: ; %entry
18896 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
18897 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
18898 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
18899 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
18900 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
18905 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18906 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
18907 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
18911 ; GFX12-WGP: ; %bb.0: ; %entry
18912 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
18913 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
18914 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
18915 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
18916 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
18921 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
18922 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
18923 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
18927 ; GFX12-CU: ; %bb.0: ; %entry
18928 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
18929 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
18930 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
18931 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
18932 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
18937 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
18938 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
18939 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
18945 %val0 = extractvalue { i32, i1 } %val, 0
18952 ; GFX6: ; %bb.0: ; %entry
18954 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
18955 ; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2
18956 ; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3
18957 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
18960 ; GFX6-NEXT: s_mov_b32 s10, 0x100f000
18970 ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
18972 ; GFX6-NEXT: s_waitcnt vmcnt(0)
18973 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
18977 ; GFX7: ; %bb.0: ; %entry
18979 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
18980 ; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2
18981 ; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3
18983 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
18998 ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
19001 ; GFX7-NEXT: s_waitcnt vmcnt(0)
19002 ; GFX7-NEXT: flat_store_dword v[0:1], v2
19006 ; GFX10-WGP: ; %bb.0: ; %entry
19007 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
19008 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
19009 ; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8
19010 ; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc
19011 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
19017 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
19022 ; GFX10-CU: ; %bb.0: ; %entry
19023 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
19024 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
19025 ; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8
19026 ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc
19027 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
19033 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
19038 ; SKIP-CACHE-INV: ; %bb.0: ; %entry
19040 ; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
19041 ; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2
19042 ; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3
19043 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
19046 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000
19056 ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
19058 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
19059 ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
19063 ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
19064 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
19065 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
19066 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
19067 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
19068 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
19074 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
19079 ; GFX90A-TGSPLIT: ; %bb.0: ; %entry
19080 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
19081 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
19082 ; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8
19083 ; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc
19084 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
19090 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
19095 ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
19096 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0
19097 ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
19098 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
19099 ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
19100 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
19105 ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
19106 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
19107 ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
19111 ; GFX940-TGSPLIT: ; %bb.0: ; %entry
19112 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
19113 ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
19114 ; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8
19115 ; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc
19116 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
19121 ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
19122 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
19123 ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
19127 ; GFX11-WGP: ; %bb.0: ; %entry
19128 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
19129 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
19130 ; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
19131 ; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
19132 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
19137 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
19138 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
19139 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
19143 ; GFX11-CU: ; %bb.0: ; %entry
19144 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
19145 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
19146 ; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
19147 ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
19148 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
19153 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
19154 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
19155 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
19159 ; GFX12-WGP: ; %bb.0: ; %entry
19160 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
19161 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
19162 ; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8
19163 ; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc
19164 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
19169 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
19170 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
19171 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
19175 ; GFX12-CU: ; %bb.0: ; %entry
19176 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
19177 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
19178 ; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
19179 ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
19180 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0
19185 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN
19186 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0
19187 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
19193 %val0 = extractvalue { i32, i1 } %val, 0